Compare commits

..

46 Commits

Author SHA1 Message Date
Daniel Wagner
51dfd77f3c note a few more changes 2017-07-31 21:59:08 -07:00
Daniel Wagner
2e3e61a2b4 Merge remote-tracking branch 'ScottSedgwick/StackCompatibility'
Conflicts:
	encoding.cabal
2017-07-30 18:00:41 -07:00
Daniel Wagner
52aabc47cd mark upper and lower bounds on library dependencies 2017-07-30 17:40:22 -07:00
Daniel Wagner
8925f398af -fno-warn-tabs 2017-07-30 17:39:58 -07:00
Daniel Wagner
7adcda8547 record what's new 2017-07-30 12:31:30 -07:00
Daniel Wagner
59b81ad775 avoid using revisions
Hackage revisions are great, but since we've removed an outdated flag we
can't use them. We'll have to use an actual point release.
2017-07-30 12:28:46 -07:00
Scott Sedgwick
8727ac25a5 Made package stack compatible
Created and added stack.yaml and .gitignore files.
Relaxed the version dependency on 'binary' package in cabal file.  Is that OK?
Also brought the minimum cabal version to >=1.8, so I could add a test target that pulls in the library.
Changed all tabs to spaces - I don't know when the Haskell compiler started giving warnings about that.
2017-07-28 14:17:42 +10:00
Daniel Wagner
6284c1a677 fix up repository metadata 2017-07-15 16:11:47 -07:00
Daniel Wagner
f1a2889bfe fix a typo in the cabal file 2017-07-15 16:11:32 -07:00
Daniel Wagner
7c07f48a45 fix dependencies so it builds
Ignore-this: 1dcaab9390e2cd47b7c429620e697003

darcs-hash:20170715230208-7469c-dcf2ca84d39bd0ca519b47e744f353c05daed1f7
2017-07-15 16:02:08 -07:00
Daniel Wagner
4be65c2f13 bump version to 0.8.1
Ignore-this: 3a50200de9ccce5f0a648ef68615a0cf

darcs-hash:20160731183857-7469c-54bc23ccb38615550b3d4ead6fed2cdcf8e5d5b8
2016-07-31 11:38:57 -07:00
endhrk
63e17e9a22 Implemented CP932 and ShiftJIS encodings
Ignore-this: 67153b679dabd226a7b1fab3ff501453

darcs-hash:20160725070504-fb088-842c21423d713a81a8ada591dffdfbb9cd08b68c
2016-07-25 00:05:04 -07:00
Daniel Wagner
de78ca5f34 update CHANGELOG + bump tag number in cabal file
Ignore-this: ef232119c63c53dcce67c96d5eee0f5a

darcs-hash:20150206173636-7469c-34a56c0a5c25aac0498446a7af9f0eb2fe90035a
2015-02-06 09:36:36 -08:00
Daniel Wagner
2be6331521 bump dependencies and version number
Ignore-this: abdc3ce060ae830738fc026f33a590b2

darcs-hash:20150206173325-7469c-c1698de4dfca938fcf6fd20cbccaeb08951bdebc
2015-02-06 09:33:25 -08:00
Daniel Wagner
2e910834dc unbreak the build
Ignore-this: d1335751599d3d5e80360d415984e1b8

darcs-hash:20150206173311-7469c-f1074a310d4d296496989e0fcb900a1bf93ae996
2015-02-06 09:33:11 -08:00
ryan.trinkle
e5e64a794f Add Applicative instances for instances of Monad
Ignore-this: a95ef4a320b4c1506f5352716a656385

darcs-hash:20150116211446-5ff09-fca82019a01b124f7c93efe6f25ff73d1abfa665
2015-01-16 13:14:46 -08:00
Daniel Wagner
f679a9eb63 update RELEASING script to gzip at the same time as tar'ing and to deal with Hackage's no-GNU-tar restriction
Ignore-this: 6820ea6cb52138b3e39d579abb174d43

darcs-hash:20140707185311-7469c-6c9b30eb0ee847791c52573eb745fa23d5c76afd
2014-07-07 11:53:11 -07:00
Daniel Wagner
699abee92b the "NEWS" file is now called "CHANGELOG"; fix this in the "RELEASING" instructions
Ignore-this: 35ba119bdb111f91844a3c16ecf79e7b

darcs-hash:20140707174439-7469c-393e1047f758f9cbd10d88d6cb4003abc97fd37e
2014-07-07 10:44:39 -07:00
Daniel Wagner
4e53752d41 update CHANGELOG
Ignore-this: cd0ef07e4dde5f7dae052954b97ca7de

darcs-hash:20140707174356-7469c-6735a4a73a469a0286aa6e97765f26d88d287f09
2014-07-07 10:43:56 -07:00
Daniel Wagner
80e12d02f9 bump version to 0.7.0.2
Ignore-this: d5645215c8fb8f2d6e51d3e135fd8181

darcs-hash:20140707174202-7469c-123eb74d859455316033f1aeee37b2e0ab5cc158
2014-07-07 10:42:02 -07:00
Daniel Wagner
e6a388b038 amend MacOSRoman to match http://en.wikipedia.org/wiki/Mac_OS_Roman as suggested by Adam Bergmark
Ignore-this: 5c8f8abc665d16ac061cc8fd730b11ae

darcs-hash:20140707173911-7469c-7747c7710cdbbe489dde218eb72ab4e98683e70c
2014-07-07 10:39:11 -07:00
Daniel Wagner
ccdcf9c392 rename NEWS to CHANGELOG in the hopes that it will appear on Hackage
Ignore-this: 4fd873c2ae3f912e62d43aa94871d7cf

darcs-hash:20140525015849-7469c-92982613122be211f4748b94ddcb6f185dc599d6
2014-05-24 18:58:49 -07:00
Daniel Wagner
f565a7e82e note GHC-7.8 compat in NEWS
Ignore-this: d5a83c9d055f00229292d90d8be3d087

darcs-hash:20140525015814-7469c-99f480b54519121089924246e8f6c57c427d8ca0
2014-05-24 18:58:14 -07:00
Daniel Wagner
f81e1808ff bump version number to 0.7.0.1
Ignore-this: d038fd6703c313311cc7bafa1b85b7a3

darcs-hash:20140525015729-7469c-4bbb82545b83f2970aef6d8864db4d1ebd7ebdc0
2014-05-24 18:57:29 -07:00
Daniel Wagner
00f914ebde GHC 7.8 compatibility, based on a patch suggested by José Romildo Malaquias
Ignore-this: 4a655a3a1d2348054d2028f5c26dc0d5

darcs-hash:20140525015603-7469c-30d96daeffde6da775c2b8f6579c0a2fbbfc479a
2014-05-24 18:56:03 -07:00
Daniel Wagner
91f119bbfb update NEWS
Ignore-this: 2d6130b4b59737de15dcbcd0fe692517

darcs-hash:20140117023145-76d51-9cc6bbb709f03381dcf1f3a3ac64ce5aec94586b
2014-01-16 18:31:45 -08:00
Daniel Wagner
da883601cb relax dependencies on binary and HaXml
Ignore-this: e31192fece193a225c138ed779c08e79

darcs-hash:20140117023131-76d51-e66839c37479414aa543da736eacd39d082e13e5
2014-01-16 18:31:31 -08:00
Daniel Wagner
8b1f45a6ec binary-0.6 compatibility
Ignore-this: 6af2adadedc20f51bb5084b3da59724e

darcs-hash:20121213030806-76d51-6d52680cab9b4f4b6c2ba17e29fa457b85d4d838
2012-12-12 19:08:06 -08:00
Daniel Wagner
25d4551635 bump version to 0.6.7.2
Ignore-this: 1ad0071f34d63d2012b869a8cc8b1818

darcs-hash:20121124014853-76d51-1cb8ae120a4ff2f4cb017d76a0bbc69ce70fe3c3
2012-11-23 17:48:53 -08:00
Daniel Wagner
2a0fc9d7b8 write future me a warning about how to use the regression tester
Ignore-this: f51e3161e581a8e480101c7060d1ae77

darcs-hash:20121208023249-76d51-f071cee65b5ec2896ad229aa27bca87a9e479854
2012-12-07 18:32:49 -08:00
Daniel Wagner
d604ac7763 add a way to run regression tests
Ignore-this: 5a811f403a442afc19ef9ae2874ac4ae

darcs-hash:20121208023159-76d51-feb50029c8fa6fd30e43e627d1f996ab898f7649
2012-12-07 18:31:59 -08:00
Daniel Wagner
9da33cd371 add Ord and Read instances to the encoding/decoding exceptions
Ignore-this: 5ec11b8739b241f1cc2935f5f8e34bfb

darcs-hash:20121208023123-76d51-b27be713e80337673d35c2b749a52e964810b17b
2012-12-07 18:31:23 -08:00
Daniel Wagner
7d2f55ce07 add a tool for generating regression tests before you start hacking on the implementation of an encoding
Ignore-this: d1f08ea3737455b87aa9828b88763d1f

darcs-hash:20121203233647-76d51-a289254906f15e21b147294bfa0d5b37efde2854
2012-12-03 15:36:47 -08:00
Daniel Wagner
d8f94105ee throw -> throwException; all tests now pass
Ignore-this: 9ca8029db67a3f2a55e74b9d2f5deb90

darcs-hash:20121203225812-76d51-341b22e3205a2d5348ab7bf2df4320824b01a172
2012-12-03 14:58:12 -08:00
Daniel Wagner
789bc64b4c make an executable that runs all the tests in preparation for making possibly-breaking changes
Ignore-this: 9fcdf4cbef8c48ab63cf8852f7c34609

darcs-hash:20121128033250-76d51-fec2cd876579663f85d3b23a9bcf2dc6a469ce94
2012-11-27 19:32:50 -08:00
Daniel Wagner
93da077efb put an upper bound on the binary package until we figure out how to deal with lookAhead disappearing
Ignore-this: c8aaa1909902fcaf6a3cd3e975f0cf23

darcs-hash:20121124014801-76d51-d8767d5a4568de20b3850ce8fa4e36e370386c4d
2012-11-23 17:48:01 -08:00
Daniel Wagner
1c3ac37dfb blurgh, it's code.haskell.org not community.haskell.org
Ignore-this: 17c564f131e17631d0b07463246afd83

darcs-hash:20121017174928-76d51-e9219915e36318d42e367dd5d4fd1060e96efb44
2012-10-17 10:49:28 -07:00
Daniel Wagner
048bf2ec0c record what to do when releasing for my feeble memory
Ignore-this: 35d31f1c221ab6be43949d610a9db279

darcs-hash:20121017174555-76d51-a679a1f3270cf781ce1d826e289cad5bbce58502
2012-10-17 10:45:55 -07:00
Daniel Wagner
5c497e5dde fix some "cabal sdist" warnings
Ignore-this: 638c9a8617ce5488efe95fa8019380fd

darcs-hash:20121017172328-76d51-da695fe80b7bddb090b219dd949d8c9b87fa9bc0
2012-10-17 10:23:28 -07:00
Daniel Wagner
3f8c3bbb26 whitespace: eol marker at end of all files
Ignore-this: 3b03abece3edb25c656f84db9cef7734

darcs-hash:20121017171258-76d51-76a4e9057c0a4c3c1370485f3dc072c18caafddf
2012-10-17 10:12:58 -07:00
Daniel Wagner
e170c32ac3 update NEWS file for 0.6.7
Ignore-this: b9eb9b25a38772f87e378311de7fc98d

darcs-hash:20121017171028-76d51-8d1b9f75ecba65e76967fe19692487db7140b405
2012-10-17 10:10:28 -07:00
Daniel Wagner
c06d483ef6 provide a flag to disable the FFI function getSystemEncoding
Ignore-this: 4c6b01ddae1d86034b7d9522c188ad75
The library for querying locale isn't as easily available on Windows as it is
on Linux. This flag provides a way for people who don't need the
getSystemEncoding function to ease the build process.

darcs-hash:20121017170920-76d51-7caa5ee9897c49a46d7beeb4aca5dcedf60e3c32
2012-10-17 10:09:20 -07:00
Daniel Wagner
201eccc546 change maintainer and bump version number
Ignore-this: 9a6a1fefb3b7662c6e36d87786626e61

darcs-hash:20121017163230-76d51-a69ebceb95d8622ab4cbc534a92bbca7d40a29b7
2012-10-17 09:32:30 -07:00
Daniel Wagner
ffb37b3e2c minor cleanup: remove some commented-out code
Ignore-this: b85f525f9eaba4e475e157a541c45070

darcs-hash:20120420205823-76d51-62f8eb59cb07d75ca4f5be1a787be93746075c85
2012-04-20 13:58:23 -07:00
Daniel Wagner
a95a1e298b changes for GHC-7 and HaXml-1.22 compatibility
Ignore-this: c517f25bda6021abca5d16cf9d7d88dd

darcs-hash:20120420205714-76d51-a665d650004e98cad59fa489b97b81496848bc3b
2012-04-20 13:57:14 -07:00
Henning Guenther
44f3f083aa Make package work with base-4.3 and mtl-2
Ignore-this: 85b05556d0b7b5968d2d0340ea9daf5d

darcs-hash:20110425123827-a4fee-dd4f4c2a305d9937316b57dbe50ce154494032ac
2011-04-25 05:38:27 -07:00
36 changed files with 15541 additions and 147 deletions

22
.gitignore vendored Normal file
View File

@ -0,0 +1,22 @@
### Haskell ###
dist
dist-*
cabal-dev
*.o
*.hi
*.chi
*.chs.h
*.dyn_o
*.dyn_hi
.hpc
.hsenv
.cabal-sandbox/
cabal.sandbox.config
*.prof
*.aux
*.hp
*.eventlog
.stack-work/
cabal.project.local
.HTF/

View File

@ -1,3 +1,48 @@
Changes from 0.8 to 0.8.2
-------------------------
* Deprecated support for very old GHCs
* Updated cabal file to differentiate between build dependencies and setup dependencies
* Add upper and lower bounds to build dependencies
* Stack compatibility
Changes from 0.8 to 0.8.1
-------------------------
* Added the ShiftJIS and CP932 encodings
Changes from 0.7.0.2 to 0.8
---------------------------
* GHC-7.10/AMP compatibility
Changes from 0.7.0.1 to 0.7.0.2
-------------------------------
* Flesh out the MacOSRoman encoding, which was missing 33 code points
Changes from 0.7 to 0.7.0.1
---------------------------
* GHC-7.8 compatibility
Changes from 0.6.7 to 0.7
-------------------------
* the type of ByteSource's fetchAhead method changed to accomodate updates to the binary package
Changes from 0.6.5 to 0.6.7
---------------------------
* Skipped version 0.6.6 due to rogue upload on Hackage
* GHC-7 and HaXml-1.22 compatibility
* add -systemEncoding flag for Windows builds
Changes from 0.6.4 to 0.6.5
---------------------------
* Make package work with >=base-4.3.0.0 and mtl-2
Changes from 0.6.3 to 0.6.4
---------------------------

View File

@ -1,4 +1,4 @@
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE MagicHash,BangPatterns #-}
module Data.Array.Static where
import Data.Static
@ -11,5 +11,5 @@ bounds :: Ix i => StaticArray i e -> (i,i)
bounds (StaticArray s e _) = (s,e)
(!) :: (StaticElement e,Ix i) => StaticArray i e -> i -> e
(!) (StaticArray s e addr) i = let (I# ri) = index (s,e) i
in extract addr ri
(!) (StaticArray s e addr) i = let !(I# ri) = index (s,e) i
in extract addr ri

View File

@ -9,4 +9,4 @@ buildStaticArray (s,e) els = "StaticArray ("++show s++") ("++show e++") \""
++"\"#"
buildStaticArray' :: (StaticElement e) => [e] -> String
buildStaticArray' els = buildStaticArray (0,length els-1) els
buildStaticArray' els = buildStaticArray (0,length els-1) els

View File

@ -70,4 +70,4 @@ mapMember c DeadEnd = False
mapMember c (LeafMap1 mp) = member c mp
mapMember c (LeafMap2 mp) = member c mp
mapMember c (LeafMap4 mp) = member c mp
mapMember c _ = True
mapMember c _ = True

View File

@ -61,4 +61,4 @@ buildCharMap lst = let slst = sortBy (comparing (fst.charRange)) lst
in "Node ("++show el++") ("++build' l bl (pred el)++") ("++
build' r el br++")"
in build' grps minBound maxBound
in build' grps minBound maxBound

View File

@ -76,6 +76,7 @@ import Data.Encoding.MacOSRoman
import Data.Encoding.JISX0201
import Data.Encoding.JISX0208
import Data.Encoding.ISO2022JP
import Data.Encoding.ShiftJIS
import Data.Encoding.CP437
import Data.Encoding.CP737
import Data.Encoding.CP775
@ -92,6 +93,7 @@ import Data.Encoding.CP865
import Data.Encoding.CP866
import Data.Encoding.CP869
import Data.Encoding.CP874
import Data.Encoding.CP932
import Data.Char
import Text.Regex
@ -327,6 +329,9 @@ encodingFromStringExplicit codeName = case (normalizeEncoding codeName) of
"jis_x_0208" -> Just $ DynEncoding JISX0208
-- ISO 2022-JP
"iso_2022_jp" -> Just $ DynEncoding ISO2022JP
-- Shift JIS
"shift_jis" -> Just $ DynEncoding ShiftJIS
"sjis" -> Just $ DynEncoding ShiftJIS
-- MSDOS codepages
"cp437" -> Just $ DynEncoding CP437
"cp737" -> Just $ DynEncoding CP737
@ -344,6 +349,7 @@ encodingFromStringExplicit codeName = case (normalizeEncoding codeName) of
"cp866" -> Just $ DynEncoding CP866
"cp869" -> Just $ DynEncoding CP869
"cp874" -> Just $ DynEncoding CP874
"cp932" -> Just $ DynEncoding CP932
-- defaults to nothing
_ -> Nothing
where

View File

@ -18,4 +18,4 @@ instance Encoding ASCII where
encodeChar enc c
| encodeable enc c = pushWord8 . fromIntegral . ord $ c
| otherwise = throwException . HasNoRepresentation $ c
encodeable _ c = c < '\128'
encodeable _ c = c < '\128'

View File

@ -98,4 +98,4 @@ decodeWithArray2 arr = do
then throwException $ IllegalCharacter w1
else return $ chr res
)
else throwException $ IllegalCharacter w1
else throwException $ IllegalCharacter w1

View File

@ -182,4 +182,4 @@ instance Encoding BootString where
Nothing -> punyDecode base nbase
Just ww -> throwException (IllegalCharacter ww)
Nothing -> punyDecode [] wrds
encodeable bs c = True -- XXX: hm, really?
encodeable bs c = True -- XXX: hm, really?

View File

@ -11,6 +11,7 @@ import Data.Word
import Data.Foldable (toList)
import Control.Throws
import Control.Exception.Extensible
import Control.Applicative
import Control.Monad.State
import Control.Monad.Identity
import Control.Monad.Reader
@ -80,6 +81,13 @@ instance ByteSink PutM where
newtype PutME a = PutME (Either EncodingException (PutM (),a))
instance Functor PutME where
fmap = liftM
instance Applicative PutME where
pure = return
(<*>) = ap
instance Monad PutME where
return x = PutME $ Right (return (),x)
(PutME x) >>= g = PutME $ do
@ -101,24 +109,26 @@ instance ByteSink PutME where
pushWord64be w = PutME $ Right (putWord64be w,())
pushWord64le w = PutME $ Right (putWord64le w,())
#ifndef MIN_VERSION_mtl(2,0,0,0)
#if MIN_VERSION_base(4,3,0)
#else
instance Monad (Either EncodingException) where
return x = Right x
Left err >>= g = Left err
Right x >>= g = g x
#endif
instance Throws EncodingException (State (Seq Char)) where
throwException = throw
instance ByteSink (State (Seq Char)) where
pushWord8 x = modify (|> (chr $ fromIntegral x))
instance ByteSink (StateT (Seq Char) (Either EncodingException)) where
instance (Monad m,Throws EncodingException m) => ByteSink (StateT (Seq Char) m) where
pushWord8 x = modify (|> (chr $ fromIntegral x))
newtype StrictSink a = StrictS (Ptr Word8 -> Int -> Int -> IO (a,Ptr Word8,Int,Int))
instance Functor StrictSink where
fmap = liftM
instance Applicative StrictSink where
pure = return
(<*>) = ap
instance Monad StrictSink where
return x = StrictS $ \cstr pos max -> return (x,cstr,pos,max)
(StrictS f) >>= g = StrictS (\cstr pos max -> do
@ -145,6 +155,13 @@ instance ByteSink StrictSink where
newtype StrictSinkE a = StrictSinkE (StrictSink (Either EncodingException a))
instance Functor StrictSinkE where
fmap = liftM
instance Applicative StrictSinkE where
pure = return
(<*>) = ap
instance Monad StrictSinkE where
return = StrictSinkE . return . Right
(StrictSinkE s) >>= g = StrictSinkE $ do
@ -172,6 +189,13 @@ createStrict sink = createStrictWithLen sink 32
newtype StrictSinkExplicit a = StrictSinkExplicit (StrictSink (Either EncodingException a))
instance Functor StrictSinkExplicit where
fmap = liftM
instance Applicative StrictSinkExplicit where
pure = return
(<*>) = ap
instance Monad StrictSinkExplicit where
return = (StrictSinkExplicit).return.Right
(StrictSinkExplicit sink) >>= f
@ -194,4 +218,4 @@ instance ByteSink (ReaderT Handle IO) where
pushWord8 x = do
h <- ask
liftIO $ do
hPutChar h (chr $ fromIntegral x)
hPutChar h (chr $ fromIntegral x)

View File

@ -6,7 +6,9 @@ import Data.Encoding.Exception
import Data.Bits
import Data.Binary.Get
import Data.Char
import Data.Maybe
import Data.Word
import Control.Applicative as A
import Control.Monad.State
import Control.Monad.Identity
import Control.Monad.Reader
@ -19,7 +21,9 @@ import System.IO
class (Monad m,Throws DecodingException m) => ByteSource m where
sourceEmpty :: m Bool
fetchWord8 :: m Word8
fetchAhead :: m a -> m a
-- 'fetchAhead act' should return the same thing 'act' does, but should
-- only consume input if 'act' returns a 'Just' value
fetchAhead :: m (Maybe a) -> m (Maybe a)
fetchWord16be :: m Word16
fetchWord16be = do
w1 <- fetchWord8
@ -95,7 +99,20 @@ instance Throws DecodingException Get where
instance ByteSource Get where
sourceEmpty = isEmpty
fetchWord8 = getWord8
fetchAhead = lookAhead
#if MIN_VERSION_binary(0,6,0)
fetchAhead act = (do
res <- act
case res of
Nothing -> A.empty
Just a -> return res
) <|> return Nothing
#else
fetchAhead act = do
res <- lookAhead act
case res of
Nothing -> return Nothing
Just a -> act
#endif
fetchWord16be = getWord16be
fetchWord16le = getWord16le
fetchWord32be = getWord32be
@ -103,25 +120,25 @@ instance ByteSource Get where
fetchWord64be = getWord64be
fetchWord64le = getWord64le
instance Throws DecodingException (State [Char]) where
throwException = throw
fetchAheadState act = do
chs <- get
res <- act
when (isNothing res) (put chs)
return res
instance ByteSource (State [Char]) where
instance ByteSource (StateT [Char] Identity) where
sourceEmpty = gets null
fetchWord8 = do
chs <- get
case chs of
[] -> throw UnexpectedEnd
[] -> throwException UnexpectedEnd
c:cs -> do
put cs
return (fromIntegral $ ord c)
fetchAhead act = do
chs <- get
res <- act
put chs
return res
fetchAhead = fetchAheadState
#ifndef MIN_VERSION_mtl(2,0,0,0)
#if MIN_VERSION_base(4,3,0)
#else
instance Monad (Either DecodingException) where
return = Right
(Left err) >>= g = Left err
@ -137,47 +154,21 @@ instance ByteSource (StateT [Char] (Either DecodingException)) where
c:cs -> do
put cs
return (fromIntegral $ ord c)
fetchAhead act = do
chs <- get
res <- act
put chs
return res
fetchAhead = fetchAheadState
instance Throws DecodingException (State BS.ByteString) where
throwException = throw
instance ByteSource (State BS.ByteString) where
sourceEmpty = gets BS.null
fetchWord8 = State (\str -> case BS.uncons str of
Nothing -> throw UnexpectedEnd
Just (c,cs) -> (c,cs))
fetchAhead act = do
str <- get
res <- act
put str
return res
instance ByteSource (StateT BS.ByteString (Either DecodingException)) where
instance (Monad m,Throws DecodingException m) => ByteSource (StateT BS.ByteString m) where
sourceEmpty = gets BS.null
fetchWord8 = StateT (\str -> case BS.uncons str of
Nothing -> Left UnexpectedEnd
Just ns -> Right ns)
fetchAhead act = do
chs <- get
res <- act
put chs
return res
Nothing -> throwException UnexpectedEnd
Just (c,cs) -> return (c,cs))
fetchAhead = fetchAheadState
instance ByteSource (StateT LBS.ByteString (Either DecodingException)) where
sourceEmpty = gets LBS.null
fetchWord8 = StateT (\str -> case LBS.uncons str of
Nothing -> Left UnexpectedEnd
Just ns -> Right ns)
fetchAhead act = do
chs <- get
res <- act
put chs
return res
fetchAhead = fetchAheadState
instance ByteSource (ReaderT Handle IO) where
sourceEmpty = do
@ -192,5 +183,5 @@ instance ByteSource (ReaderT Handle IO) where
h <- ask
pos <- liftIO $ hGetPosn h
res <- act
liftIO $ hSetPosn pos
return res
when (isNothing res) (liftIO $ hSetPosn pos)
return res

7941
Data/Encoding/CP932.xml Normal file

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,7 @@ import Control.Monad.Identity
data EncodingException
= HasNoRepresentation Char -- ^ Thrown if a specific character
-- is not representable in an encoding.
deriving (Eq,Show,Typeable)
deriving (Eq,Ord,Show,Read,Typeable)
instance Exception EncodingException
@ -25,6 +25,6 @@ data DecodingException
| OutOfRange -- ^ the decoded value was out of the unicode range
| IllegalRepresentation [Word8] -- ^ The character sequence encodes a
-- character, but is illegal.
deriving (Eq,Show,Typeable)
deriving (Eq,Ord,Show,Read,Typeable)
instance Exception DecodingException

View File

@ -26,11 +26,10 @@ instance Encoding ISO2022JP where
encodeable _ c = encodeable ASCII c || encodeable JISX0201 c || encodeable JISX0208 c
instance ISO2022 ISO2022JP where
readEscape _ = do
w <- fetchAhead fetchWord8
readEscape _ = fetchAhead $ do
w <- fetchWord8
if w == 27
then (do
fetchWord8
w2 <- fetchWord8
w3 <- fetchWord8
case w2 of
@ -49,4 +48,4 @@ instance ISO2022 ISO2022JP where
| encodeable ASCII c = Just (DynEncoding ASCII,[27,40,66])
| encodeable JISX0201 c = Just (DynEncoding JISX0201,[27,40,74])
| encodeable JISX0208 c = Just (DynEncoding JISX0208,[27,36,66])
| otherwise = Nothing
| otherwise = Nothing

View File

@ -20,4 +20,4 @@ instance Encoding ISO88591 where
decodeChar _ = do
w <- fetchWord8
return (chr $ fromIntegral w)
encodeable _ c = c <= '\255'
encodeable _ c = c <= '\255'

View File

@ -57,4 +57,4 @@ instance Encoding KOI8R where
| otherwise = case lookup ch koi8rMap of
Just w -> pushWord8 w
Nothing -> throwException (HasNoRepresentation ch)
encodeable _ c = member c koi8rMap
encodeable _ c = member c koi8rMap

View File

@ -57,4 +57,4 @@ instance Encoding KOI8U where
| otherwise = case lookup ch koi8uMap of
Just w -> pushWord8 w
Nothing -> throwException (HasNoRepresentation ch)
encodeable _ c = member c koi8uMap
encodeable _ c = member c koi8uMap

View File

@ -61,6 +61,38 @@
# * Change mapping of 0xBD from U+2126 to its canonical
# decomposition, U+03A9.
0x00 0x0000 # NULL
0x01 0x0001 # START OF HEADING
0x02 0x0002 # START OF TEXT
0x03 0x0003 # END OF TEXT
0x04 0x0004 # END OF TRANSMISSION
0x05 0x0005 # ENQUIRY
0x06 0x0006 # ACKNOWLEDGE
0x07 0x0007 # BELL
0x08 0x0008 # BACKSPACE
0x09 0x0009 # HORIZONTAL TABULATION
0x0A 0x000A # LINE FEED
0x0B 0x000B # VERTICAL TABULATION
0x0C 0x000C # FORM FEED
0x0D 0x000D # CARRIAGE RETURN
0x0E 0x000E # SHIFT OUT
0x0F 0x000F # SHIFT IN
0x10 0x0010 # DATA LINK ESCAPE
0x11 0x0011 # DEVICE CONTROL ONE
0x12 0x0012 # DEVICE CONTROL TWO
0x13 0x0013 # DEVICE CONTROL THREE
0x14 0x0014 # DEVICE CONTROL FOUR
0x15 0x0015 # NEGATIVE ACKNOWLEDGE
0x16 0x0016 # SYNCHRONOUS IDLE
0x17 0x0017 # END OF TRANSMISSION BLOCK
0x18 0x0018 # CANCEL
0x19 0x0019 # END OF MEDIUM
0x1A 0x001A # SUBSTITUTE
0x1B 0x001B # ESCAPE
0x1C 0x001C # FILE SEPARATOR
0x1D 0x001D # GROUP SEPARATOR
0x1E 0x001E # RECORD SEPARATOR
0x1F 0x001F # UNIT SEPARATOR
0x20 0x0020 # SPACE
0x21 0x0021 # EXCLAMATION MARK
0x22 0x0022 # QUOTATION MARK
@ -156,7 +188,7 @@
0x7C 0x007C # VERTICAL LINE
0x7D 0x007D # RIGHT CURLY BRACKET
0x7E 0x007E # TILDE
#
0x7F 0x007F # DELETE
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA

View File

@ -157,4 +157,4 @@ preprocessMapping tp src trg mods name = do
," Just c -> return c"
," encodeChar _ c = mapEncode c "++mpname
," encodeable _ c = mapMember c "++mpname
]
]

View File

@ -8,6 +8,7 @@ import Data.List (find)
import Data.Char
import Text.XML.HaXml.XmlContent
import Text.XML.HaXml.OneOfN
import Text.XML.HaXml.Types
testFile :: FilePath -> IO CharacterMapping
testFile fp = fReadXml fp
@ -130,7 +131,7 @@ instance HTypeable CharacterMapping where
toHType x = Defined "characterMapping" [] []
instance XmlContent CharacterMapping where
toContents (CharacterMapping as a b c) =
[CElem (Elem "characterMapping" (toAttrs as) (maybe [] toContents a
[CElem (Elem (N "characterMapping") (toAttrs as) (maybe [] toContents a
++ toContents b
++ toContents c)) ()]
parseContents = do
@ -167,29 +168,29 @@ instance XmlAttributes CharacterMapping_Attrs where
instance XmlAttrType CharacterMapping_bidiOrder where
fromAttrToTyp n (n',v)
| n==n' = translate (attr2str v)
| N n==n' = translate (attr2str v)
| otherwise = Nothing
where translate "logical" = Just CharacterMapping_bidiOrder_logical
translate "RTL" = Just CharacterMapping_bidiOrder_RTL
translate "LTR" = Just CharacterMapping_bidiOrder_LTR
translate _ = Nothing
toAttrFrTyp n CharacterMapping_bidiOrder_logical = Just (n, str2attr "logical")
toAttrFrTyp n CharacterMapping_bidiOrder_RTL = Just (n, str2attr "RTL")
toAttrFrTyp n CharacterMapping_bidiOrder_LTR = Just (n, str2attr "LTR")
toAttrFrTyp n CharacterMapping_bidiOrder_logical = Just (N n, str2attr "logical")
toAttrFrTyp n CharacterMapping_bidiOrder_RTL = Just (N n, str2attr "RTL")
toAttrFrTyp n CharacterMapping_bidiOrder_LTR = Just (N n, str2attr "LTR")
instance XmlAttrType CharacterMapping_combiningOrder where
fromAttrToTyp n (n',v)
| n==n' = translate (attr2str v)
| N n==n' = translate (attr2str v)
| otherwise = Nothing
where translate "before" = Just CharacterMapping_combiningOrder_before
translate "after" = Just CharacterMapping_combiningOrder_after
translate _ = Nothing
toAttrFrTyp n CharacterMapping_combiningOrder_before = Just (n, str2attr "before")
toAttrFrTyp n CharacterMapping_combiningOrder_after = Just (n, str2attr "after")
toAttrFrTyp n CharacterMapping_combiningOrder_before = Just (N n, str2attr "before")
toAttrFrTyp n CharacterMapping_combiningOrder_after = Just (N n, str2attr "after")
instance XmlAttrType CharacterMapping_normalization where
fromAttrToTyp n (n',v)
| n==n' = translate (attr2str v)
| N n==n' = translate (attr2str v)
| otherwise = Nothing
where translate "undetermined" = Just CharacterMapping_normalization_undetermined
translate "neither" = Just CharacterMapping_normalization_neither
@ -197,17 +198,17 @@ instance XmlAttrType CharacterMapping_normalization where
translate "NFD" = Just CharacterMapping_normalization_NFD
translate "NFC_NFD" = Just CharacterMapping_normalization_NFC_NFD
translate _ = Nothing
toAttrFrTyp n CharacterMapping_normalization_undetermined = Just (n, str2attr "undetermined")
toAttrFrTyp n CharacterMapping_normalization_neither = Just (n, str2attr "neither")
toAttrFrTyp n CharacterMapping_normalization_NFC = Just (n, str2attr "NFC")
toAttrFrTyp n CharacterMapping_normalization_NFD = Just (n, str2attr "NFD")
toAttrFrTyp n CharacterMapping_normalization_NFC_NFD = Just (n, str2attr "NFC_NFD")
toAttrFrTyp n CharacterMapping_normalization_undetermined = Just (N n, str2attr "undetermined")
toAttrFrTyp n CharacterMapping_normalization_neither = Just (N n, str2attr "neither")
toAttrFrTyp n CharacterMapping_normalization_NFC = Just (N n, str2attr "NFC")
toAttrFrTyp n CharacterMapping_normalization_NFD = Just (N n, str2attr "NFD")
toAttrFrTyp n CharacterMapping_normalization_NFC_NFD = Just (N n, str2attr "NFC_NFD")
instance XmlAttrType ByteSequence where
fromAttrToTyp n (n',v)
| n==n' = parseByteSequence (attr2str v)
| N n==n' = parseByteSequence (attr2str v)
| otherwise = Nothing
toAttrFrTyp n bs = Just (n, str2attr $ show bs)
toAttrFrTyp n bs = Just (N n, str2attr $ show bs)
parseByteSequence :: String -> Maybe ByteSequence
parseByteSequence str = do
@ -222,9 +223,9 @@ instance Show ByteSequence where
instance XmlAttrType CodePoints where
fromAttrToTyp n (n',v)
| n==n' = parseCodePoints (attr2str v)
| N n==n' = parseCodePoints (attr2str v)
| otherwise = Nothing
toAttrFrTyp n bs = Just (n, str2attr $ show bs)
toAttrFrTyp n bs = Just (N n, str2attr $ show bs)
parseCodePoints :: String -> Maybe CodePoints
parseCodePoints str = do
@ -241,7 +242,7 @@ instance HTypeable Stateful_siso where
toHType x = Defined "stateful_siso" [] []
instance XmlContent Stateful_siso where
toContents (Stateful_siso a b) =
[CElem (Elem "stateful_siso" [] (toContents a ++ toContents b)) ()]
[CElem (Elem (N "stateful_siso") [] (toContents a ++ toContents b)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["stateful_siso"]
; interior e $ return (Stateful_siso) `apply` parseContents
@ -252,7 +253,7 @@ instance HTypeable History where
toHType x = Defined "history" [] []
instance XmlContent History where
toContents (History a) =
[CElem (Elem "history" [] (toContents a)) ()]
[CElem (Elem (N "history") [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["history"]
; interior e $ return (History) `apply` parseContents
@ -262,7 +263,7 @@ instance HTypeable Modified where
toHType x = Defined "modified" [] []
instance XmlContent Modified where
toContents (Modified as a) =
[CElem (Elem "modified" (toAttrs as) (toText a)) ()]
[CElem (Elem (N "modified") (toAttrs as) (toText a)) ()]
parseContents = do
{ e@(Elem _ as _) <- element ["modified"]
; interior e $ return (Modified (fromAttrs as))
@ -283,7 +284,7 @@ instance HTypeable Validity where
toHType x = Defined "validity" [] []
instance XmlContent Validity where
toContents (Validity a) =
[CElem (Elem "validity" [] (toContents a)) ()]
[CElem (Elem (N "validity") [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["validity"]
; interior e $ return (Validity) `apply` parseContents
@ -293,7 +294,7 @@ instance HTypeable State where
toHType x = Defined "state" [] []
instance XmlContent State where
toContents as =
[CElem (Elem "state" (toAttrs as) []) ()]
[CElem (Elem (N "state") (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["state"]
; return (fromAttrs as)
@ -319,7 +320,7 @@ instance HTypeable Assignments where
toHType x = Defined "assignments" [] []
instance XmlContent Assignments where
toContents (Assignments as a b c d e) =
[CElem (Elem "assignments" (toAttrs as) (concatMap toContents a ++
[CElem (Elem (N "assignments") (toAttrs as) (concatMap toContents a ++
concatMap toContents b ++ concatMap toContents c ++
concatMap toContents d ++
concatMap toContents e)) ()]
@ -345,7 +346,7 @@ instance HTypeable A where
toHType x = Defined "a" [] []
instance XmlContent A where
toContents as =
[CElem (Elem "a" (toAttrs as) []) ()]
[CElem (Elem (N "a") (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["a"]
; return (fromAttrs as)
@ -368,7 +369,7 @@ instance HTypeable Fub where
toHType x = Defined "fub" [] []
instance XmlContent Fub where
toContents as =
[CElem (Elem "fub" (toAttrs as) []) ()]
[CElem (Elem (N "fub") (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["fub"]
; return (fromAttrs as)
@ -396,7 +397,7 @@ instance HTypeable Fbu where
toHType x = Defined "fbu" [] []
instance XmlContent Fbu where
toContents as =
[CElem (Elem "fbu" (toAttrs as) []) ()]
[CElem (Elem (N "fbu") (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["fbu"]
; return (fromAttrs as)
@ -418,7 +419,7 @@ instance HTypeable Sub1 where
toHType x = Defined "sub1" [] []
instance XmlContent Sub1 where
toContents as =
[CElem (Elem "sub1" (toAttrs as) []) ()]
[CElem (Elem (N "sub1") (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["sub1"]
; return (fromAttrs as)
@ -440,7 +441,7 @@ instance HTypeable Range where
toHType x = Defined "range" [] []
instance XmlContent Range where
toContents as =
[CElem (Elem "range" (toAttrs as) []) ()]
[CElem (Elem (N "range") (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["range"]
; return (fromAttrs as)
@ -470,7 +471,7 @@ instance HTypeable Iso2022 where
toHType x = Defined "iso2022" [] []
instance XmlContent Iso2022 where
toContents (Iso2022 a b) =
[CElem (Elem "iso2022" [] (maybe [] toContents a ++
[CElem (Elem (N "iso2022") [] (maybe [] toContents a ++
toContents b)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["iso2022"]
@ -482,7 +483,7 @@ instance HTypeable Default2022 where
toHType x = Defined "default2022" [] []
instance XmlContent Default2022 where
toContents as =
[CElem (Elem "default2022" (toAttrs as) []) ()]
[CElem (Elem (N "default2022") (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["default2022"]
; return (fromAttrs as)
@ -500,7 +501,7 @@ instance HTypeable Escape where
toHType x = Defined "escape" [] []
instance XmlContent Escape where
toContents as =
[CElem (Elem "escape" (toAttrs as) []) ()]
[CElem (Elem (N "escape") (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["escape"]
; return (fromAttrs as)
@ -520,7 +521,7 @@ instance HTypeable Si where
toHType x = Defined "si" [] []
instance XmlContent Si where
toContents (Si a) =
[CElem (Elem "si" [] (toContents a)) ()]
[CElem (Elem (N "si") [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["si"]
; interior e $ return (Si) `apply` parseContents
@ -530,7 +531,7 @@ instance HTypeable So where
toHType x = Defined "so" [] []
instance XmlContent So where
toContents (So a) =
[CElem (Elem "so" [] (toContents a)) ()]
[CElem (Elem (N "so") [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["so"]
; interior e $ return (So) `apply` parseContents
@ -540,7 +541,7 @@ instance HTypeable Ss2 where
toHType x = Defined "ss2" [] []
instance XmlContent Ss2 where
toContents (Ss2 a) =
[CElem (Elem "ss2" [] (toContents a)) ()]
[CElem (Elem (N "ss2") [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["ss2"]
; interior e $ return (Ss2) `apply` parseContents
@ -550,7 +551,7 @@ instance HTypeable Ss3 where
toHType x = Defined "ss3" [] []
instance XmlContent Ss3 where
toContents (Ss3 a) =
[CElem (Elem "ss3" [] (toContents a)) ()]
[CElem (Elem (N "ss3") [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["ss3"]
; interior e $ return (Ss3) `apply` parseContents
@ -560,7 +561,7 @@ instance HTypeable Designator where
toHType x = Defined "designator" [] []
instance XmlContent Designator where
toContents as =
[CElem (Elem "designator" (toAttrs as) []) ()]
[CElem (Elem (N "designator") (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["designator"]
; return (fromAttrs as)

View File

@ -313,4 +313,4 @@ buildGroups ((c,bs):rest) = (EncodingGroup c end (bs:wrds)):buildGroups oth
group n all@((c,bs):rest)
| succ n == c = let (e,res,oth) = group c rest
in (e,bs:res,oth)
| otherwise = (n,[],all)
| otherwise = (n,[],all)

7093
Data/Encoding/ShiftJIS.xml Normal file

File diff suppressed because it is too large Load Diff

View File

@ -79,4 +79,4 @@ instance Encoding UTF16 where
return (c:cs)
Right bom -> decode bom
decode enc = untilM sourceEmpty (decodeChar enc)
encodeable _ c = (c > '\xDFFF' && c <= '\x10FFFF') || c < '\xD800'
encodeable _ c = (c > '\xDFFF' && c <= '\x10FFFF') || c < '\xD800'

View File

@ -44,4 +44,4 @@ instance Encoding UTF32 where
rest <- untilM sourceEmpty (decodeChar UTF32)
return ((chr $ fromIntegral ch):rest)
decode enc = untilM sourceEmpty (decodeChar enc)
encodeable _ _ = True
encodeable _ _ = True

View File

@ -25,4 +25,4 @@ member ind (StaticMap idx _) = lookup' 1
else case compare ind (idx!n) of
LT -> lookup' (n * 2)
GT -> lookup' ((n * 2) + 1)
EQ -> True
EQ -> True

View File

@ -1,4 +1,4 @@
{-# LANGUAGE MagicHash,FlexibleInstances #-}
{-# LANGUAGE MagicHash,FlexibleInstances,BangPatterns,CPP #-}
module Data.Static where
import GHC.Exts
@ -36,7 +36,11 @@ instance StaticElement Char where
instance StaticElement (Maybe Char) where
extract addr i = let !v = indexWord32OffAddr# addr i
#if __GLASGOW_HASKELL__ >= 708
in if isTrue# (eqWord# v (int2Word# 4294967295#)) -- -1 in Word32
#else
in if eqWord# v (int2Word# 4294967295#) -- -1 in Word32
#endif
then Nothing
else (if (I# (word2Int# v)) > 0x10FFFF
then error (show (I# (word2Int# v))++" is not a valid char ("++show (I# i)++")")
@ -64,4 +68,4 @@ instance StaticElement a => StaticElement (a,a,a,a) where
x3 = extract addr (i *# 4# +# 2#)
x4 = extract addr (i *# 4# +# 3#)
in (x1,x2,x3,x4)
gen (x1,x2,x3,x4) = gen x1 ++ gen x2 ++ gen x3 ++ gen x4
gen (x1,x2,x3,x4) = gen x1 ++ gen x2 ++ gen x3 ++ gen x4

27
LICENSE Normal file
View File

@ -0,0 +1,27 @@
Copyright (c) Daniel Wagner
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the author nor the names of his contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
!! This software is provided by the regents and contributors ``as is'' and
!! any express or implied warranties, including, but not limited to, the
!! implied warranties of merchantability and fitness for a particular purpose
!! are disclaimed. In no event shall the authors or contributors be liable
!! for any direct, indirect, incidental, special, exemplary, or consequential
!! damages (including, but not limited to, procurement of substitute goods
!! or services; loss of use, data, or profits; or business interruption)
!! however caused and on any theory of liability, whether in contract, strict
!! liability, or tort (including negligence or otherwise) arising in any way
!! out of the use of this software, even if advised of the possibility of
!! such damage.

14
RELEASING Normal file
View File

@ -0,0 +1,14 @@
On each release:
* update CHANGELOG
* bump the version number in the .cabal file (including in the "this" repository spec)
* cabal upload a release tarball
* darcs tag with the version number
To build a release tarball:
cabal configure
./dist/setup/setup sdist
tar xf dist/encoding-version.tar.gz
rm -r encoding-version/dist
tar --format=ustar -czf dist/encoding-version.tar.gz encoding-version
rm -r encoding-version
cabal upload dist/encoding-version.tar

View File

@ -131,12 +131,19 @@ interact f = do
line <- hGetLine stdin
hPutStrLn stdout (f line)
#ifdef SYSTEM_ENCODING
foreign import ccall "system_encoding.h get_system_encoding"
get_system_encoding :: IO CString
#endif
-- | Returns the encoding used on the current system.
-- | Returns the encoding used on the current system. Currently only supported
-- on Linux-alikes.
getSystemEncoding :: IO DynEncoding
getSystemEncoding = do
#ifdef SYSTEM_ENCODING
enc <- get_system_encoding
str <- peekCString enc
return $ encodingFromString str
return $ encodingFromString str
#else
error "getSystemEncoding is not supported on this platform"
#endif

View File

@ -1,37 +1,59 @@
Name: encoding
Version: 0.6.4
Version: 0.8.2
Author: Henning Günther
Maintainer: h.guenther@tu-bs.de
Maintainer: daniel@wagner-home.com
License: BSD3
License-File: LICENSE
Synopsis: A library for various character encodings
Description:
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunatly, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
Category: Codec
Homepage: http://code.haskell.org/encoding/
Cabal-Version: >=1.2
Cabal-Version: >=1.8
Build-Type: Custom
Extra-Source-Files:
NEWS
CHANGELOG
Data/Encoding/Preprocessor/Mapping.hs
Data/Encoding/Preprocessor/XMLMapping.hs
Data/Encoding/Preprocessor/XMLMappingBuilder.hs
Data/CharMap/Builder.hs
Data/Array/Static/Builder.hs
Data/Map/Static/Builder.hs
system_encoding.h
system_encoding.c
Flag splitBase
description: Choose the new smaller, split-up base package.
Flag newGHC
description: Use ghc version > 6.10
Flag systemEncoding
description: Provide the getSystemEncoding action to query the locale.
Source-Repository head
Type: git
Location: http://github.com/dmwit/encoding
Source-Repository this
Type: git
Location: http://github.com/dmwit/encoding
Tag: 0.8.2
Custom-Setup
Setup-Depends: base >=3 && <5,
Cabal >=1.24 && <1.25,
containers,
filepath,
ghc-prim,
HaXml >=1.22 && <1.26
Library
if flag(splitBase)
if flag(newGHC)
Build-Depends: bytestring, base >= 3 && < 5, binary, mtl, containers, extensible-exceptions, array, regex-compat, ghc-prim, ghc >= 6.10, HaXml >= 1.19
else
Build-Depends: bytestring, base >= 3 && < 5, binary, mtl, containers, extensible-exceptions, array, regex-compat, ghc < 6.10, HaXml >= 1.19
else
Build-Depends: base < 3, binary, extensible-exceptions, HaXml >= 1.19
Build-Depends: array >=0.4 && <0.6,
base >=4 && <5,
binary >=0.7 && <0.10,
bytestring >=0.9 && <0.11,
containers >=0.4 && <0.6,
extensible-exceptions >=0.1 && <0.2,
ghc-prim >=0.3 && <0.6,
mtl >=2.0 && <2.3,
regex-compat >=0.71 && <0.95
Extensions: CPP
Exposed-Modules:
Data.Encoding
@ -77,6 +99,7 @@ Library
Data.Encoding.JISX0212
Data.Encoding.ISO2022
Data.Encoding.ISO2022JP
Data.Encoding.ShiftJIS
Data.Encoding.CP437
Data.Encoding.CP737
Data.Encoding.CP775
@ -93,6 +116,7 @@ Library
Data.Encoding.CP866
Data.Encoding.CP869
Data.Encoding.CP874
Data.Encoding.CP932
System.IO.Encoding
Other-Modules:
Data.Encoding.Base
@ -100,9 +124,26 @@ Library
Data.Map.Static
Data.Static
Data.CharMap
Includes:
system_encoding.h
Install-Includes:
system_encoding.h
C-Sources:
system_encoding.c
if impl(ghc >= 7.10)
GHC-Options: -fno-warn-tabs
if flag(systemEncoding)
Includes:
system_encoding.h
Install-Includes:
system_encoding.h
C-Sources:
system_encoding.c
CPP-Options: -DSYSTEM_ENCODING
test-suite encoding-test
type: exitcode-stdio-1.0
hs-source-dirs: tests
main-is: Main.hs
other-modules: Test.Tester
, Test.Tests
build-depends: base
, bytestring
, encoding
, HUnit
, QuickCheck
ghc-options: -threaded -rtsopts -with-rtsopts=-N

66
stack.yaml Normal file
View File

@ -0,0 +1,66 @@
# This file was automatically generated by 'stack init'
#
# Some commonly used options have been documented as comments in this file.
# For advanced use and comprehensive documentation of the format, please see:
# http://docs.haskellstack.org/en/stable/yaml_configuration/
# Resolver to choose a 'specific' stackage snapshot or a compiler version.
# A snapshot resolver dictates the compiler version and the set of packages
# to be used for project dependencies. For example:
#
# resolver: lts-3.5
# resolver: nightly-2015-09-21
# resolver: ghc-7.10.2
# resolver: ghcjs-0.1.0_ghc-7.10.2
# resolver:
# name: custom-snapshot
# location: "./custom-snapshot.yaml"
resolver: lts-8.22
# User packages to be built.
# Various formats can be used as shown in the example below.
#
# packages:
# - some-directory
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
# - location:
# git: https://github.com/commercialhaskell/stack.git
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# extra-dep: true
# subdirs:
# - auto-update
# - wai
#
# A package marked 'extra-dep: true' will only be built if demanded by a
# non-dependency (i.e. a user package), and its test suites and benchmarks
# will not be run. This is useful for tweaking upstream packages.
packages:
- '.'
# Dependency packages to be pulled from upstream that are not in the resolver
# (e.g., acme-missiles-0.3)
extra-deps: []
# Override default flag values for local packages and extra-deps
flags: {}
# Extra package databases containing global packages
extra-package-dbs: []
# Control whether we use the GHC we find on the path
# system-ghc: true
#
# Require a specific version of stack, using version ranges
# require-stack-version: -any # Default
# require-stack-version: ">=1.4"
#
# Override the architecture used by stack, especially useful on Windows
# arch: i386
# arch: x86_64
#
# Extra directories used by stack for building
# extra-include-dirs: [/path/to/dir]
# extra-lib-dirs: [/path/to/dir]
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor

18
tests/Main.hs Normal file
View File

@ -0,0 +1,18 @@
import Control.Monad
import Test.HUnit
import Test.Tests
hunitTests =
[ ("utf8Tests", utf8Tests)
, ("utf16Tests", utf16Tests)
, ("punycodeTests", punycodeTests)
, ("isoTests", isoTests)
, ("jisTests", jisTests)
, ("gb18030Tests", gb18030Tests)
]
main = do
identityTests
forM_ hunitTests $ \(name, test) -> do
putStrLn $ "running " ++ name
runTestTT test >>= print

66
tests/RegressionTest.hs Normal file
View File

@ -0,0 +1,66 @@
{-# LANGUAGE NoMonomorphismRestriction #-}
import Control.Applicative
import Control.Monad
import Data.ByteString (ByteString, pack)
import Data.Encoding
import Data.Word
import System.Random
-- for example:
import Data.Encoding.ISO2022JP
--main = generate ISO2022JP
main = test ISO2022JP
-- BEWARE! These things are _very_ memory-hungry if you don't compile with optimizations.
-- end example
randomRepeat f max = randomRIO (0, max) >>= flip replicateM f
randomString = randomRepeat randomIO
randomGoodString = randomRepeat . randomGoodChar
randomGoodChar f = let
good = filter f [minBound..maxBound]
n = length good
in do
i <- randomRIO (0, n-1)
return (good !! i)
generate enc = do
let filename = show enc ++ ".regression"
randomGood = randomGoodString (encodeable enc)
shortMixedEnc <- replicateM 300 ( randomString 10)
shortGoodEnc <- replicateM 30 ( randomGood 10)
longMixedEnc <- replicateM 300 ( randomString 1000)
longGoodEnc <- replicateM 3000 ( randomGood 1000)
shortDec <- replicateM 300 (pack <$> randomString 10)
longDec <- replicateM 3000 (pack <$> randomString 1000)
writeFile filename (show
[ (s, encodeStrictByteStringExplicit enc s)
| ss <- [shortMixedEnc, shortGoodEnc, longMixedEnc, longGoodEnc]
, s <- ss
] ++ "\n")
appendFile filename (show
[ (bs, decodeStrictByteStringExplicit enc bs)
| bss <- [shortDec, longDec]
, bs <- bss
] ++ "\n")
complain action input expected actual = when (expected /= actual) . putStrLn . concat $
[ "when "
, pad action
, show input
, "\n"
, pad "expected"
, show expected
, "\n"
, pad "but got"
, show actual
]
where
size = maximum . map length $ [action, "expected", "but got"]
pad s = s ++ replicate (size - length s) ' ' ++ ": "
test enc = do
[encoded_, decoded_] <- lines <$> readFile (show enc ++ ".regression")
let encoded = read encoded_
decoded = read decoded_
forM_ encoded $ \(s , correctEncoding) -> complain "encoding" s correctEncoding (encodeStrictByteStringExplicit enc s)
forM_ decoded $ \(bs, correctDecoding) -> complain "decoding" bs correctDecoding (decodeStrictByteStringExplicit enc bs)

View File

@ -53,9 +53,6 @@ charGen = let
threeByte = choose (0x010000,0x10FFFF) >>= return.chr
in frequency [(40,ascii),(30,oneByte),(20,twoByte),(10,threeByte)]
instance Arbitrary Word8 where
arbitrary = choose (0x00,0xFF::Int) >>= return.fromIntegral
quickCheckEncoding :: Encoding enc => enc -> IO ()
quickCheckEncoding e = do
quickCheck (encodingIdentity e)
@ -79,4 +76,4 @@ decodingIdentity e wrd
Right res' -> property (bstr==res')
where
bstr = BS.pack wrd
decoded = decodeStrictByteStringExplicit e bstr
decoded = decodeStrictByteStringExplicit e bstr

View File

@ -282,4 +282,4 @@ gb18030Tests = TestList $ map test $
{-big5Tests :: Test
big5Tests = test (EncodingFileTest BIG5 "data/BIG5" "data/BIG5.UTF-8")-}