Compare commits

..

No commits in common. "master" and "0.6.4" have entirely different histories.

36 changed files with 147 additions and 15541 deletions

22
.gitignore vendored
View File

@ -1,22 +0,0 @@
### Haskell ###
dist
dist-*
cabal-dev
*.o
*.hi
*.chi
*.chs.h
*.dyn_o
*.dyn_hi
.hpc
.hsenv
.cabal-sandbox/
cabal.sandbox.config
*.prof
*.aux
*.hp
*.eventlog
.stack-work/
cabal.project.local
.HTF/

View File

@ -1,4 +1,4 @@
{-# LANGUAGE MagicHash,BangPatterns #-}
{-# LANGUAGE MagicHash #-}
module Data.Array.Static where
import Data.Static
@ -11,5 +11,5 @@ bounds :: Ix i => StaticArray i e -> (i,i)
bounds (StaticArray s e _) = (s,e)
(!) :: (StaticElement e,Ix i) => StaticArray i e -> i -> e
(!) (StaticArray s e addr) i = let !(I# ri) = index (s,e) i
in extract addr ri
(!) (StaticArray s e addr) i = let (I# ri) = index (s,e) i
in extract addr ri

View File

@ -9,4 +9,4 @@ buildStaticArray (s,e) els = "StaticArray ("++show s++") ("++show e++") \""
++"\"#"
buildStaticArray' :: (StaticElement e) => [e] -> String
buildStaticArray' els = buildStaticArray (0,length els-1) els
buildStaticArray' els = buildStaticArray (0,length els-1) els

View File

@ -70,4 +70,4 @@ mapMember c DeadEnd = False
mapMember c (LeafMap1 mp) = member c mp
mapMember c (LeafMap2 mp) = member c mp
mapMember c (LeafMap4 mp) = member c mp
mapMember c _ = True
mapMember c _ = True

View File

@ -61,4 +61,4 @@ buildCharMap lst = let slst = sortBy (comparing (fst.charRange)) lst
in "Node ("++show el++") ("++build' l bl (pred el)++") ("++
build' r el br++")"
in build' grps minBound maxBound
in build' grps minBound maxBound

View File

@ -76,7 +76,6 @@ import Data.Encoding.MacOSRoman
import Data.Encoding.JISX0201
import Data.Encoding.JISX0208
import Data.Encoding.ISO2022JP
import Data.Encoding.ShiftJIS
import Data.Encoding.CP437
import Data.Encoding.CP737
import Data.Encoding.CP775
@ -93,7 +92,6 @@ import Data.Encoding.CP865
import Data.Encoding.CP866
import Data.Encoding.CP869
import Data.Encoding.CP874
import Data.Encoding.CP932
import Data.Char
import Text.Regex
@ -329,9 +327,6 @@ encodingFromStringExplicit codeName = case (normalizeEncoding codeName) of
"jis_x_0208" -> Just $ DynEncoding JISX0208
-- ISO 2022-JP
"iso_2022_jp" -> Just $ DynEncoding ISO2022JP
-- Shift JIS
"shift_jis" -> Just $ DynEncoding ShiftJIS
"sjis" -> Just $ DynEncoding ShiftJIS
-- MSDOS codepages
"cp437" -> Just $ DynEncoding CP437
"cp737" -> Just $ DynEncoding CP737
@ -349,7 +344,6 @@ encodingFromStringExplicit codeName = case (normalizeEncoding codeName) of
"cp866" -> Just $ DynEncoding CP866
"cp869" -> Just $ DynEncoding CP869
"cp874" -> Just $ DynEncoding CP874
"cp932" -> Just $ DynEncoding CP932
-- defaults to nothing
_ -> Nothing
where

View File

@ -18,4 +18,4 @@ instance Encoding ASCII where
encodeChar enc c
| encodeable enc c = pushWord8 . fromIntegral . ord $ c
| otherwise = throwException . HasNoRepresentation $ c
encodeable _ c = c < '\128'
encodeable _ c = c < '\128'

View File

@ -98,4 +98,4 @@ decodeWithArray2 arr = do
then throwException $ IllegalCharacter w1
else return $ chr res
)
else throwException $ IllegalCharacter w1
else throwException $ IllegalCharacter w1

View File

@ -182,4 +182,4 @@ instance Encoding BootString where
Nothing -> punyDecode base nbase
Just ww -> throwException (IllegalCharacter ww)
Nothing -> punyDecode [] wrds
encodeable bs c = True -- XXX: hm, really?
encodeable bs c = True -- XXX: hm, really?

View File

@ -11,7 +11,6 @@ import Data.Word
import Data.Foldable (toList)
import Control.Throws
import Control.Exception.Extensible
import Control.Applicative
import Control.Monad.State
import Control.Monad.Identity
import Control.Monad.Reader
@ -81,13 +80,6 @@ instance ByteSink PutM where
newtype PutME a = PutME (Either EncodingException (PutM (),a))
instance Functor PutME where
fmap = liftM
instance Applicative PutME where
pure = return
(<*>) = ap
instance Monad PutME where
return x = PutME $ Right (return (),x)
(PutME x) >>= g = PutME $ do
@ -109,26 +101,24 @@ instance ByteSink PutME where
pushWord64be w = PutME $ Right (putWord64be w,())
pushWord64le w = PutME $ Right (putWord64le w,())
#if MIN_VERSION_base(4,3,0)
#else
#ifndef MIN_VERSION_mtl(2,0,0,0)
instance Monad (Either EncodingException) where
return x = Right x
Left err >>= g = Left err
Right x >>= g = g x
#endif
instance (Monad m,Throws EncodingException m) => ByteSink (StateT (Seq Char) m) where
instance Throws EncodingException (State (Seq Char)) where
throwException = throw
instance ByteSink (State (Seq Char)) where
pushWord8 x = modify (|> (chr $ fromIntegral x))
instance ByteSink (StateT (Seq Char) (Either EncodingException)) where
pushWord8 x = modify (|> (chr $ fromIntegral x))
newtype StrictSink a = StrictS (Ptr Word8 -> Int -> Int -> IO (a,Ptr Word8,Int,Int))
instance Functor StrictSink where
fmap = liftM
instance Applicative StrictSink where
pure = return
(<*>) = ap
instance Monad StrictSink where
return x = StrictS $ \cstr pos max -> return (x,cstr,pos,max)
(StrictS f) >>= g = StrictS (\cstr pos max -> do
@ -155,13 +145,6 @@ instance ByteSink StrictSink where
newtype StrictSinkE a = StrictSinkE (StrictSink (Either EncodingException a))
instance Functor StrictSinkE where
fmap = liftM
instance Applicative StrictSinkE where
pure = return
(<*>) = ap
instance Monad StrictSinkE where
return = StrictSinkE . return . Right
(StrictSinkE s) >>= g = StrictSinkE $ do
@ -189,13 +172,6 @@ createStrict sink = createStrictWithLen sink 32
newtype StrictSinkExplicit a = StrictSinkExplicit (StrictSink (Either EncodingException a))
instance Functor StrictSinkExplicit where
fmap = liftM
instance Applicative StrictSinkExplicit where
pure = return
(<*>) = ap
instance Monad StrictSinkExplicit where
return = (StrictSinkExplicit).return.Right
(StrictSinkExplicit sink) >>= f
@ -218,4 +194,4 @@ instance ByteSink (ReaderT Handle IO) where
pushWord8 x = do
h <- ask
liftIO $ do
hPutChar h (chr $ fromIntegral x)
hPutChar h (chr $ fromIntegral x)

View File

@ -6,9 +6,7 @@ import Data.Encoding.Exception
import Data.Bits
import Data.Binary.Get
import Data.Char
import Data.Maybe
import Data.Word
import Control.Applicative as A
import Control.Monad.State
import Control.Monad.Identity
import Control.Monad.Reader
@ -21,9 +19,7 @@ import System.IO
class (Monad m,Throws DecodingException m) => ByteSource m where
sourceEmpty :: m Bool
fetchWord8 :: m Word8
-- 'fetchAhead act' should return the same thing 'act' does, but should
-- only consume input if 'act' returns a 'Just' value
fetchAhead :: m (Maybe a) -> m (Maybe a)
fetchAhead :: m a -> m a
fetchWord16be :: m Word16
fetchWord16be = do
w1 <- fetchWord8
@ -99,20 +95,7 @@ instance Throws DecodingException Get where
instance ByteSource Get where
sourceEmpty = isEmpty
fetchWord8 = getWord8
#if MIN_VERSION_binary(0,6,0)
fetchAhead act = (do
res <- act
case res of
Nothing -> A.empty
Just a -> return res
) <|> return Nothing
#else
fetchAhead act = do
res <- lookAhead act
case res of
Nothing -> return Nothing
Just a -> act
#endif
fetchAhead = lookAhead
fetchWord16be = getWord16be
fetchWord16le = getWord16le
fetchWord32be = getWord32be
@ -120,25 +103,25 @@ instance ByteSource Get where
fetchWord64be = getWord64be
fetchWord64le = getWord64le
fetchAheadState act = do
chs <- get
res <- act
when (isNothing res) (put chs)
return res
instance Throws DecodingException (State [Char]) where
throwException = throw
instance ByteSource (StateT [Char] Identity) where
instance ByteSource (State [Char]) where
sourceEmpty = gets null
fetchWord8 = do
chs <- get
case chs of
[] -> throwException UnexpectedEnd
[] -> throw UnexpectedEnd
c:cs -> do
put cs
return (fromIntegral $ ord c)
fetchAhead = fetchAheadState
fetchAhead act = do
chs <- get
res <- act
put chs
return res
#if MIN_VERSION_base(4,3,0)
#else
#ifndef MIN_VERSION_mtl(2,0,0,0)
instance Monad (Either DecodingException) where
return = Right
(Left err) >>= g = Left err
@ -154,21 +137,47 @@ instance ByteSource (StateT [Char] (Either DecodingException)) where
c:cs -> do
put cs
return (fromIntegral $ ord c)
fetchAhead = fetchAheadState
fetchAhead act = do
chs <- get
res <- act
put chs
return res
instance (Monad m,Throws DecodingException m) => ByteSource (StateT BS.ByteString m) where
instance Throws DecodingException (State BS.ByteString) where
throwException = throw
instance ByteSource (State BS.ByteString) where
sourceEmpty = gets BS.null
fetchWord8 = State (\str -> case BS.uncons str of
Nothing -> throw UnexpectedEnd
Just (c,cs) -> (c,cs))
fetchAhead act = do
str <- get
res <- act
put str
return res
instance ByteSource (StateT BS.ByteString (Either DecodingException)) where
sourceEmpty = gets BS.null
fetchWord8 = StateT (\str -> case BS.uncons str of
Nothing -> throwException UnexpectedEnd
Just (c,cs) -> return (c,cs))
fetchAhead = fetchAheadState
Nothing -> Left UnexpectedEnd
Just ns -> Right ns)
fetchAhead act = do
chs <- get
res <- act
put chs
return res
instance ByteSource (StateT LBS.ByteString (Either DecodingException)) where
sourceEmpty = gets LBS.null
fetchWord8 = StateT (\str -> case LBS.uncons str of
Nothing -> Left UnexpectedEnd
Just ns -> Right ns)
fetchAhead = fetchAheadState
fetchAhead act = do
chs <- get
res <- act
put chs
return res
instance ByteSource (ReaderT Handle IO) where
sourceEmpty = do
@ -183,5 +192,5 @@ instance ByteSource (ReaderT Handle IO) where
h <- ask
pos <- liftIO $ hGetPosn h
res <- act
when (isNothing res) (liftIO $ hSetPosn pos)
return res
liftIO $ hSetPosn pos
return res

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,7 @@ import Control.Monad.Identity
data EncodingException
= HasNoRepresentation Char -- ^ Thrown if a specific character
-- is not representable in an encoding.
deriving (Eq,Ord,Show,Read,Typeable)
deriving (Eq,Show,Typeable)
instance Exception EncodingException
@ -25,6 +25,6 @@ data DecodingException
| OutOfRange -- ^ the decoded value was out of the unicode range
| IllegalRepresentation [Word8] -- ^ The character sequence encodes a
-- character, but is illegal.
deriving (Eq,Ord,Show,Read,Typeable)
deriving (Eq,Show,Typeable)
instance Exception DecodingException

View File

@ -26,10 +26,11 @@ instance Encoding ISO2022JP where
encodeable _ c = encodeable ASCII c || encodeable JISX0201 c || encodeable JISX0208 c
instance ISO2022 ISO2022JP where
readEscape _ = fetchAhead $ do
w <- fetchWord8
readEscape _ = do
w <- fetchAhead fetchWord8
if w == 27
then (do
fetchWord8
w2 <- fetchWord8
w3 <- fetchWord8
case w2 of
@ -48,4 +49,4 @@ instance ISO2022 ISO2022JP where
| encodeable ASCII c = Just (DynEncoding ASCII,[27,40,66])
| encodeable JISX0201 c = Just (DynEncoding JISX0201,[27,40,74])
| encodeable JISX0208 c = Just (DynEncoding JISX0208,[27,36,66])
| otherwise = Nothing
| otherwise = Nothing

View File

@ -20,4 +20,4 @@ instance Encoding ISO88591 where
decodeChar _ = do
w <- fetchWord8
return (chr $ fromIntegral w)
encodeable _ c = c <= '\255'
encodeable _ c = c <= '\255'

View File

@ -57,4 +57,4 @@ instance Encoding KOI8R where
| otherwise = case lookup ch koi8rMap of
Just w -> pushWord8 w
Nothing -> throwException (HasNoRepresentation ch)
encodeable _ c = member c koi8rMap
encodeable _ c = member c koi8rMap

View File

@ -57,4 +57,4 @@ instance Encoding KOI8U where
| otherwise = case lookup ch koi8uMap of
Just w -> pushWord8 w
Nothing -> throwException (HasNoRepresentation ch)
encodeable _ c = member c koi8uMap
encodeable _ c = member c koi8uMap

View File

@ -61,38 +61,6 @@
# * Change mapping of 0xBD from U+2126 to its canonical
# decomposition, U+03A9.
0x00 0x0000 # NULL
0x01 0x0001 # START OF HEADING
0x02 0x0002 # START OF TEXT
0x03 0x0003 # END OF TEXT
0x04 0x0004 # END OF TRANSMISSION
0x05 0x0005 # ENQUIRY
0x06 0x0006 # ACKNOWLEDGE
0x07 0x0007 # BELL
0x08 0x0008 # BACKSPACE
0x09 0x0009 # HORIZONTAL TABULATION
0x0A 0x000A # LINE FEED
0x0B 0x000B # VERTICAL TABULATION
0x0C 0x000C # FORM FEED
0x0D 0x000D # CARRIAGE RETURN
0x0E 0x000E # SHIFT OUT
0x0F 0x000F # SHIFT IN
0x10 0x0010 # DATA LINK ESCAPE
0x11 0x0011 # DEVICE CONTROL ONE
0x12 0x0012 # DEVICE CONTROL TWO
0x13 0x0013 # DEVICE CONTROL THREE
0x14 0x0014 # DEVICE CONTROL FOUR
0x15 0x0015 # NEGATIVE ACKNOWLEDGE
0x16 0x0016 # SYNCHRONOUS IDLE
0x17 0x0017 # END OF TRANSMISSION BLOCK
0x18 0x0018 # CANCEL
0x19 0x0019 # END OF MEDIUM
0x1A 0x001A # SUBSTITUTE
0x1B 0x001B # ESCAPE
0x1C 0x001C # FILE SEPARATOR
0x1D 0x001D # GROUP SEPARATOR
0x1E 0x001E # RECORD SEPARATOR
0x1F 0x001F # UNIT SEPARATOR
0x20 0x0020 # SPACE
0x21 0x0021 # EXCLAMATION MARK
0x22 0x0022 # QUOTATION MARK
@ -188,7 +156,7 @@
0x7C 0x007C # VERTICAL LINE
0x7D 0x007D # RIGHT CURLY BRACKET
0x7E 0x007E # TILDE
0x7F 0x007F # DELETE
#
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA

View File

@ -157,4 +157,4 @@ preprocessMapping tp src trg mods name = do
," Just c -> return c"
," encodeChar _ c = mapEncode c "++mpname
," encodeable _ c = mapMember c "++mpname
]
]

View File

@ -8,7 +8,6 @@ import Data.List (find)
import Data.Char
import Text.XML.HaXml.XmlContent
import Text.XML.HaXml.OneOfN
import Text.XML.HaXml.Types
testFile :: FilePath -> IO CharacterMapping
testFile fp = fReadXml fp
@ -131,7 +130,7 @@ instance HTypeable CharacterMapping where
toHType x = Defined "characterMapping" [] []
instance XmlContent CharacterMapping where
toContents (CharacterMapping as a b c) =
[CElem (Elem (N "characterMapping") (toAttrs as) (maybe [] toContents a
[CElem (Elem "characterMapping" (toAttrs as) (maybe [] toContents a
++ toContents b
++ toContents c)) ()]
parseContents = do
@ -168,29 +167,29 @@ instance XmlAttributes CharacterMapping_Attrs where
instance XmlAttrType CharacterMapping_bidiOrder where
fromAttrToTyp n (n',v)
| N n==n' = translate (attr2str v)
| n==n' = translate (attr2str v)
| otherwise = Nothing
where translate "logical" = Just CharacterMapping_bidiOrder_logical
translate "RTL" = Just CharacterMapping_bidiOrder_RTL
translate "LTR" = Just CharacterMapping_bidiOrder_LTR
translate _ = Nothing
toAttrFrTyp n CharacterMapping_bidiOrder_logical = Just (N n, str2attr "logical")
toAttrFrTyp n CharacterMapping_bidiOrder_RTL = Just (N n, str2attr "RTL")
toAttrFrTyp n CharacterMapping_bidiOrder_LTR = Just (N n, str2attr "LTR")
toAttrFrTyp n CharacterMapping_bidiOrder_logical = Just (n, str2attr "logical")
toAttrFrTyp n CharacterMapping_bidiOrder_RTL = Just (n, str2attr "RTL")
toAttrFrTyp n CharacterMapping_bidiOrder_LTR = Just (n, str2attr "LTR")
instance XmlAttrType CharacterMapping_combiningOrder where
fromAttrToTyp n (n',v)
| N n==n' = translate (attr2str v)
| n==n' = translate (attr2str v)
| otherwise = Nothing
where translate "before" = Just CharacterMapping_combiningOrder_before
translate "after" = Just CharacterMapping_combiningOrder_after
translate _ = Nothing
toAttrFrTyp n CharacterMapping_combiningOrder_before = Just (N n, str2attr "before")
toAttrFrTyp n CharacterMapping_combiningOrder_after = Just (N n, str2attr "after")
toAttrFrTyp n CharacterMapping_combiningOrder_before = Just (n, str2attr "before")
toAttrFrTyp n CharacterMapping_combiningOrder_after = Just (n, str2attr "after")
instance XmlAttrType CharacterMapping_normalization where
fromAttrToTyp n (n',v)
| N n==n' = translate (attr2str v)
| n==n' = translate (attr2str v)
| otherwise = Nothing
where translate "undetermined" = Just CharacterMapping_normalization_undetermined
translate "neither" = Just CharacterMapping_normalization_neither
@ -198,17 +197,17 @@ instance XmlAttrType CharacterMapping_normalization where
translate "NFD" = Just CharacterMapping_normalization_NFD
translate "NFC_NFD" = Just CharacterMapping_normalization_NFC_NFD
translate _ = Nothing
toAttrFrTyp n CharacterMapping_normalization_undetermined = Just (N n, str2attr "undetermined")
toAttrFrTyp n CharacterMapping_normalization_neither = Just (N n, str2attr "neither")
toAttrFrTyp n CharacterMapping_normalization_NFC = Just (N n, str2attr "NFC")
toAttrFrTyp n CharacterMapping_normalization_NFD = Just (N n, str2attr "NFD")
toAttrFrTyp n CharacterMapping_normalization_NFC_NFD = Just (N n, str2attr "NFC_NFD")
toAttrFrTyp n CharacterMapping_normalization_undetermined = Just (n, str2attr "undetermined")
toAttrFrTyp n CharacterMapping_normalization_neither = Just (n, str2attr "neither")
toAttrFrTyp n CharacterMapping_normalization_NFC = Just (n, str2attr "NFC")
toAttrFrTyp n CharacterMapping_normalization_NFD = Just (n, str2attr "NFD")
toAttrFrTyp n CharacterMapping_normalization_NFC_NFD = Just (n, str2attr "NFC_NFD")
instance XmlAttrType ByteSequence where
fromAttrToTyp n (n',v)
| N n==n' = parseByteSequence (attr2str v)
| n==n' = parseByteSequence (attr2str v)
| otherwise = Nothing
toAttrFrTyp n bs = Just (N n, str2attr $ show bs)
toAttrFrTyp n bs = Just (n, str2attr $ show bs)
parseByteSequence :: String -> Maybe ByteSequence
parseByteSequence str = do
@ -223,9 +222,9 @@ instance Show ByteSequence where
instance XmlAttrType CodePoints where
fromAttrToTyp n (n',v)
| N n==n' = parseCodePoints (attr2str v)
| n==n' = parseCodePoints (attr2str v)
| otherwise = Nothing
toAttrFrTyp n bs = Just (N n, str2attr $ show bs)
toAttrFrTyp n bs = Just (n, str2attr $ show bs)
parseCodePoints :: String -> Maybe CodePoints
parseCodePoints str = do
@ -242,7 +241,7 @@ instance HTypeable Stateful_siso where
toHType x = Defined "stateful_siso" [] []
instance XmlContent Stateful_siso where
toContents (Stateful_siso a b) =
[CElem (Elem (N "stateful_siso") [] (toContents a ++ toContents b)) ()]
[CElem (Elem "stateful_siso" [] (toContents a ++ toContents b)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["stateful_siso"]
; interior e $ return (Stateful_siso) `apply` parseContents
@ -253,7 +252,7 @@ instance HTypeable History where
toHType x = Defined "history" [] []
instance XmlContent History where
toContents (History a) =
[CElem (Elem (N "history") [] (toContents a)) ()]
[CElem (Elem "history" [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["history"]
; interior e $ return (History) `apply` parseContents
@ -263,7 +262,7 @@ instance HTypeable Modified where
toHType x = Defined "modified" [] []
instance XmlContent Modified where
toContents (Modified as a) =
[CElem (Elem (N "modified") (toAttrs as) (toText a)) ()]
[CElem (Elem "modified" (toAttrs as) (toText a)) ()]
parseContents = do
{ e@(Elem _ as _) <- element ["modified"]
; interior e $ return (Modified (fromAttrs as))
@ -284,7 +283,7 @@ instance HTypeable Validity where
toHType x = Defined "validity" [] []
instance XmlContent Validity where
toContents (Validity a) =
[CElem (Elem (N "validity") [] (toContents a)) ()]
[CElem (Elem "validity" [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["validity"]
; interior e $ return (Validity) `apply` parseContents
@ -294,7 +293,7 @@ instance HTypeable State where
toHType x = Defined "state" [] []
instance XmlContent State where
toContents as =
[CElem (Elem (N "state") (toAttrs as) []) ()]
[CElem (Elem "state" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["state"]
; return (fromAttrs as)
@ -320,7 +319,7 @@ instance HTypeable Assignments where
toHType x = Defined "assignments" [] []
instance XmlContent Assignments where
toContents (Assignments as a b c d e) =
[CElem (Elem (N "assignments") (toAttrs as) (concatMap toContents a ++
[CElem (Elem "assignments" (toAttrs as) (concatMap toContents a ++
concatMap toContents b ++ concatMap toContents c ++
concatMap toContents d ++
concatMap toContents e)) ()]
@ -346,7 +345,7 @@ instance HTypeable A where
toHType x = Defined "a" [] []
instance XmlContent A where
toContents as =
[CElem (Elem (N "a") (toAttrs as) []) ()]
[CElem (Elem "a" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["a"]
; return (fromAttrs as)
@ -369,7 +368,7 @@ instance HTypeable Fub where
toHType x = Defined "fub" [] []
instance XmlContent Fub where
toContents as =
[CElem (Elem (N "fub") (toAttrs as) []) ()]
[CElem (Elem "fub" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["fub"]
; return (fromAttrs as)
@ -397,7 +396,7 @@ instance HTypeable Fbu where
toHType x = Defined "fbu" [] []
instance XmlContent Fbu where
toContents as =
[CElem (Elem (N "fbu") (toAttrs as) []) ()]
[CElem (Elem "fbu" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["fbu"]
; return (fromAttrs as)
@ -419,7 +418,7 @@ instance HTypeable Sub1 where
toHType x = Defined "sub1" [] []
instance XmlContent Sub1 where
toContents as =
[CElem (Elem (N "sub1") (toAttrs as) []) ()]
[CElem (Elem "sub1" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["sub1"]
; return (fromAttrs as)
@ -441,7 +440,7 @@ instance HTypeable Range where
toHType x = Defined "range" [] []
instance XmlContent Range where
toContents as =
[CElem (Elem (N "range") (toAttrs as) []) ()]
[CElem (Elem "range" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["range"]
; return (fromAttrs as)
@ -471,7 +470,7 @@ instance HTypeable Iso2022 where
toHType x = Defined "iso2022" [] []
instance XmlContent Iso2022 where
toContents (Iso2022 a b) =
[CElem (Elem (N "iso2022") [] (maybe [] toContents a ++
[CElem (Elem "iso2022" [] (maybe [] toContents a ++
toContents b)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["iso2022"]
@ -483,7 +482,7 @@ instance HTypeable Default2022 where
toHType x = Defined "default2022" [] []
instance XmlContent Default2022 where
toContents as =
[CElem (Elem (N "default2022") (toAttrs as) []) ()]
[CElem (Elem "default2022" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["default2022"]
; return (fromAttrs as)
@ -501,7 +500,7 @@ instance HTypeable Escape where
toHType x = Defined "escape" [] []
instance XmlContent Escape where
toContents as =
[CElem (Elem (N "escape") (toAttrs as) []) ()]
[CElem (Elem "escape" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["escape"]
; return (fromAttrs as)
@ -521,7 +520,7 @@ instance HTypeable Si where
toHType x = Defined "si" [] []
instance XmlContent Si where
toContents (Si a) =
[CElem (Elem (N "si") [] (toContents a)) ()]
[CElem (Elem "si" [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["si"]
; interior e $ return (Si) `apply` parseContents
@ -531,7 +530,7 @@ instance HTypeable So where
toHType x = Defined "so" [] []
instance XmlContent So where
toContents (So a) =
[CElem (Elem (N "so") [] (toContents a)) ()]
[CElem (Elem "so" [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["so"]
; interior e $ return (So) `apply` parseContents
@ -541,7 +540,7 @@ instance HTypeable Ss2 where
toHType x = Defined "ss2" [] []
instance XmlContent Ss2 where
toContents (Ss2 a) =
[CElem (Elem (N "ss2") [] (toContents a)) ()]
[CElem (Elem "ss2" [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["ss2"]
; interior e $ return (Ss2) `apply` parseContents
@ -551,7 +550,7 @@ instance HTypeable Ss3 where
toHType x = Defined "ss3" [] []
instance XmlContent Ss3 where
toContents (Ss3 a) =
[CElem (Elem (N "ss3") [] (toContents a)) ()]
[CElem (Elem "ss3" [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["ss3"]
; interior e $ return (Ss3) `apply` parseContents
@ -561,7 +560,7 @@ instance HTypeable Designator where
toHType x = Defined "designator" [] []
instance XmlContent Designator where
toContents as =
[CElem (Elem (N "designator") (toAttrs as) []) ()]
[CElem (Elem "designator" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["designator"]
; return (fromAttrs as)

View File

@ -313,4 +313,4 @@ buildGroups ((c,bs):rest) = (EncodingGroup c end (bs:wrds)):buildGroups oth
group n all@((c,bs):rest)
| succ n == c = let (e,res,oth) = group c rest
in (e,bs:res,oth)
| otherwise = (n,[],all)
| otherwise = (n,[],all)

File diff suppressed because it is too large Load Diff

View File

@ -79,4 +79,4 @@ instance Encoding UTF16 where
return (c:cs)
Right bom -> decode bom
decode enc = untilM sourceEmpty (decodeChar enc)
encodeable _ c = (c > '\xDFFF' && c <= '\x10FFFF') || c < '\xD800'
encodeable _ c = (c > '\xDFFF' && c <= '\x10FFFF') || c < '\xD800'

View File

@ -44,4 +44,4 @@ instance Encoding UTF32 where
rest <- untilM sourceEmpty (decodeChar UTF32)
return ((chr $ fromIntegral ch):rest)
decode enc = untilM sourceEmpty (decodeChar enc)
encodeable _ _ = True
encodeable _ _ = True

View File

@ -25,4 +25,4 @@ member ind (StaticMap idx _) = lookup' 1
else case compare ind (idx!n) of
LT -> lookup' (n * 2)
GT -> lookup' ((n * 2) + 1)
EQ -> True
EQ -> True

View File

@ -1,4 +1,4 @@
{-# LANGUAGE MagicHash,FlexibleInstances,BangPatterns,CPP #-}
{-# LANGUAGE MagicHash,FlexibleInstances #-}
module Data.Static where
import GHC.Exts
@ -36,11 +36,7 @@ instance StaticElement Char where
instance StaticElement (Maybe Char) where
extract addr i = let !v = indexWord32OffAddr# addr i
#if __GLASGOW_HASKELL__ >= 708
in if isTrue# (eqWord# v (int2Word# 4294967295#)) -- -1 in Word32
#else
in if eqWord# v (int2Word# 4294967295#) -- -1 in Word32
#endif
then Nothing
else (if (I# (word2Int# v)) > 0x10FFFF
then error (show (I# (word2Int# v))++" is not a valid char ("++show (I# i)++")")
@ -68,4 +64,4 @@ instance StaticElement a => StaticElement (a,a,a,a) where
x3 = extract addr (i *# 4# +# 2#)
x4 = extract addr (i *# 4# +# 3#)
in (x1,x2,x3,x4)
gen (x1,x2,x3,x4) = gen x1 ++ gen x2 ++ gen x3 ++ gen x4
gen (x1,x2,x3,x4) = gen x1 ++ gen x2 ++ gen x3 ++ gen x4

27
LICENSE
View File

@ -1,27 +0,0 @@
Copyright (c) Daniel Wagner
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the author nor the names of his contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
!! This software is provided by the regents and contributors ``as is'' and
!! any express or implied warranties, including, but not limited to, the
!! implied warranties of merchantability and fitness for a particular purpose
!! are disclaimed. In no event shall the authors or contributors be liable
!! for any direct, indirect, incidental, special, exemplary, or consequential
!! damages (including, but not limited to, procurement of substitute goods
!! or services; loss of use, data, or profits; or business interruption)
!! however caused and on any theory of liability, whether in contract, strict
!! liability, or tort (including negligence or otherwise) arising in any way
!! out of the use of this software, even if advised of the possibility of
!! such damage.

View File

@ -1,48 +1,3 @@
Changes from 0.8 to 0.8.2
-------------------------
* Deprecated support for very old GHCs
* Updated cabal file to differentiate between build dependencies and setup dependencies
* Add upper and lower bounds to build dependencies
* Stack compatibility
Changes from 0.8 to 0.8.1
-------------------------
* Added the ShiftJIS and CP932 encodings
Changes from 0.7.0.2 to 0.8
---------------------------
* GHC-7.10/AMP compatibility
Changes from 0.7.0.1 to 0.7.0.2
-------------------------------
* Flesh out the MacOSRoman encoding, which was missing 33 code points
Changes from 0.7 to 0.7.0.1
---------------------------
* GHC-7.8 compatibility
Changes from 0.6.7 to 0.7
-------------------------
* the type of ByteSource's fetchAhead method changed to accomodate updates to the binary package
Changes from 0.6.5 to 0.6.7
---------------------------
* Skipped version 0.6.6 due to rogue upload on Hackage
* GHC-7 and HaXml-1.22 compatibility
* add -systemEncoding flag for Windows builds
Changes from 0.6.4 to 0.6.5
---------------------------
* Make package work with >=base-4.3.0.0 and mtl-2
Changes from 0.6.3 to 0.6.4
---------------------------

View File

@ -1,14 +0,0 @@
On each release:
* update CHANGELOG
* bump the version number in the .cabal file (including in the "this" repository spec)
* cabal upload a release tarball
* darcs tag with the version number
To build a release tarball:
cabal configure
./dist/setup/setup sdist
tar xf dist/encoding-version.tar.gz
rm -r encoding-version/dist
tar --format=ustar -czf dist/encoding-version.tar.gz encoding-version
rm -r encoding-version
cabal upload dist/encoding-version.tar

View File

@ -131,19 +131,12 @@ interact f = do
line <- hGetLine stdin
hPutStrLn stdout (f line)
#ifdef SYSTEM_ENCODING
foreign import ccall "system_encoding.h get_system_encoding"
get_system_encoding :: IO CString
#endif
-- | Returns the encoding used on the current system. Currently only supported
-- on Linux-alikes.
-- | Returns the encoding used on the current system.
getSystemEncoding :: IO DynEncoding
getSystemEncoding = do
#ifdef SYSTEM_ENCODING
enc <- get_system_encoding
str <- peekCString enc
return $ encodingFromString str
#else
error "getSystemEncoding is not supported on this platform"
#endif
return $ encodingFromString str

View File

@ -1,59 +1,37 @@
Name: encoding
Version: 0.8.2
Version: 0.6.4
Author: Henning Günther
Maintainer: daniel@wagner-home.com
Maintainer: h.guenther@tu-bs.de
License: BSD3
License-File: LICENSE
Synopsis: A library for various character encodings
Description:
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunatly, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
Category: Codec
Homepage: http://code.haskell.org/encoding/
Cabal-Version: >=1.8
Cabal-Version: >=1.2
Build-Type: Custom
Extra-Source-Files:
CHANGELOG
NEWS
Data/Encoding/Preprocessor/Mapping.hs
Data/Encoding/Preprocessor/XMLMapping.hs
Data/Encoding/Preprocessor/XMLMappingBuilder.hs
Data/CharMap/Builder.hs
Data/Array/Static/Builder.hs
Data/Map/Static/Builder.hs
system_encoding.h
system_encoding.c
Flag systemEncoding
description: Provide the getSystemEncoding action to query the locale.
Source-Repository head
Type: git
Location: http://github.com/dmwit/encoding
Source-Repository this
Type: git
Location: http://github.com/dmwit/encoding
Tag: 0.8.2
Custom-Setup
Setup-Depends: base >=3 && <5,
Cabal >=1.24 && <1.25,
containers,
filepath,
ghc-prim,
HaXml >=1.22 && <1.26
Flag splitBase
description: Choose the new smaller, split-up base package.
Flag newGHC
description: Use ghc version > 6.10
Library
Build-Depends: array >=0.4 && <0.6,
base >=4 && <5,
binary >=0.7 && <0.10,
bytestring >=0.9 && <0.11,
containers >=0.4 && <0.6,
extensible-exceptions >=0.1 && <0.2,
ghc-prim >=0.3 && <0.6,
mtl >=2.0 && <2.3,
regex-compat >=0.71 && <0.95
Extensions: CPP
if flag(splitBase)
if flag(newGHC)
Build-Depends: bytestring, base >= 3 && < 5, binary, mtl, containers, extensible-exceptions, array, regex-compat, ghc-prim, ghc >= 6.10, HaXml >= 1.19
else
Build-Depends: bytestring, base >= 3 && < 5, binary, mtl, containers, extensible-exceptions, array, regex-compat, ghc < 6.10, HaXml >= 1.19
else
Build-Depends: base < 3, binary, extensible-exceptions, HaXml >= 1.19
Exposed-Modules:
Data.Encoding
@ -99,7 +77,6 @@ Library
Data.Encoding.JISX0212
Data.Encoding.ISO2022
Data.Encoding.ISO2022JP
Data.Encoding.ShiftJIS
Data.Encoding.CP437
Data.Encoding.CP737
Data.Encoding.CP775
@ -116,7 +93,6 @@ Library
Data.Encoding.CP866
Data.Encoding.CP869
Data.Encoding.CP874
Data.Encoding.CP932
System.IO.Encoding
Other-Modules:
Data.Encoding.Base
@ -124,26 +100,9 @@ Library
Data.Map.Static
Data.Static
Data.CharMap
if impl(ghc >= 7.10)
GHC-Options: -fno-warn-tabs
if flag(systemEncoding)
Includes:
system_encoding.h
Install-Includes:
system_encoding.h
C-Sources:
system_encoding.c
CPP-Options: -DSYSTEM_ENCODING
test-suite encoding-test
type: exitcode-stdio-1.0
hs-source-dirs: tests
main-is: Main.hs
other-modules: Test.Tester
, Test.Tests
build-depends: base
, bytestring
, encoding
, HUnit
, QuickCheck
ghc-options: -threaded -rtsopts -with-rtsopts=-N
Includes:
system_encoding.h
Install-Includes:
system_encoding.h
C-Sources:
system_encoding.c

View File

@ -1,66 +0,0 @@
# This file was automatically generated by 'stack init'
#
# Some commonly used options have been documented as comments in this file.
# For advanced use and comprehensive documentation of the format, please see:
# http://docs.haskellstack.org/en/stable/yaml_configuration/
# Resolver to choose a 'specific' stackage snapshot or a compiler version.
# A snapshot resolver dictates the compiler version and the set of packages
# to be used for project dependencies. For example:
#
# resolver: lts-3.5
# resolver: nightly-2015-09-21
# resolver: ghc-7.10.2
# resolver: ghcjs-0.1.0_ghc-7.10.2
# resolver:
# name: custom-snapshot
# location: "./custom-snapshot.yaml"
resolver: lts-8.22
# User packages to be built.
# Various formats can be used as shown in the example below.
#
# packages:
# - some-directory
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
# - location:
# git: https://github.com/commercialhaskell/stack.git
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# extra-dep: true
# subdirs:
# - auto-update
# - wai
#
# A package marked 'extra-dep: true' will only be built if demanded by a
# non-dependency (i.e. a user package), and its test suites and benchmarks
# will not be run. This is useful for tweaking upstream packages.
packages:
- '.'
# Dependency packages to be pulled from upstream that are not in the resolver
# (e.g., acme-missiles-0.3)
extra-deps: []
# Override default flag values for local packages and extra-deps
flags: {}
# Extra package databases containing global packages
extra-package-dbs: []
# Control whether we use the GHC we find on the path
# system-ghc: true
#
# Require a specific version of stack, using version ranges
# require-stack-version: -any # Default
# require-stack-version: ">=1.4"
#
# Override the architecture used by stack, especially useful on Windows
# arch: i386
# arch: x86_64
#
# Extra directories used by stack for building
# extra-include-dirs: [/path/to/dir]
# extra-lib-dirs: [/path/to/dir]
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor

View File

@ -1,18 +0,0 @@
import Control.Monad
import Test.HUnit
import Test.Tests
hunitTests =
[ ("utf8Tests", utf8Tests)
, ("utf16Tests", utf16Tests)
, ("punycodeTests", punycodeTests)
, ("isoTests", isoTests)
, ("jisTests", jisTests)
, ("gb18030Tests", gb18030Tests)
]
main = do
identityTests
forM_ hunitTests $ \(name, test) -> do
putStrLn $ "running " ++ name
runTestTT test >>= print

View File

@ -1,66 +0,0 @@
{-# LANGUAGE NoMonomorphismRestriction #-}
import Control.Applicative
import Control.Monad
import Data.ByteString (ByteString, pack)
import Data.Encoding
import Data.Word
import System.Random
-- for example:
import Data.Encoding.ISO2022JP
--main = generate ISO2022JP
main = test ISO2022JP
-- BEWARE! These things are _very_ memory-hungry if you don't compile with optimizations.
-- end example
randomRepeat f max = randomRIO (0, max) >>= flip replicateM f
randomString = randomRepeat randomIO
randomGoodString = randomRepeat . randomGoodChar
randomGoodChar f = let
good = filter f [minBound..maxBound]
n = length good
in do
i <- randomRIO (0, n-1)
return (good !! i)
generate enc = do
let filename = show enc ++ ".regression"
randomGood = randomGoodString (encodeable enc)
shortMixedEnc <- replicateM 300 ( randomString 10)
shortGoodEnc <- replicateM 30 ( randomGood 10)
longMixedEnc <- replicateM 300 ( randomString 1000)
longGoodEnc <- replicateM 3000 ( randomGood 1000)
shortDec <- replicateM 300 (pack <$> randomString 10)
longDec <- replicateM 3000 (pack <$> randomString 1000)
writeFile filename (show
[ (s, encodeStrictByteStringExplicit enc s)
| ss <- [shortMixedEnc, shortGoodEnc, longMixedEnc, longGoodEnc]
, s <- ss
] ++ "\n")
appendFile filename (show
[ (bs, decodeStrictByteStringExplicit enc bs)
| bss <- [shortDec, longDec]
, bs <- bss
] ++ "\n")
complain action input expected actual = when (expected /= actual) . putStrLn . concat $
[ "when "
, pad action
, show input
, "\n"
, pad "expected"
, show expected
, "\n"
, pad "but got"
, show actual
]
where
size = maximum . map length $ [action, "expected", "but got"]
pad s = s ++ replicate (size - length s) ' ' ++ ": "
test enc = do
[encoded_, decoded_] <- lines <$> readFile (show enc ++ ".regression")
let encoded = read encoded_
decoded = read decoded_
forM_ encoded $ \(s , correctEncoding) -> complain "encoding" s correctEncoding (encodeStrictByteStringExplicit enc s)
forM_ decoded $ \(bs, correctDecoding) -> complain "decoding" bs correctDecoding (decodeStrictByteStringExplicit enc bs)

View File

@ -53,6 +53,9 @@ charGen = let
threeByte = choose (0x010000,0x10FFFF) >>= return.chr
in frequency [(40,ascii),(30,oneByte),(20,twoByte),(10,threeByte)]
instance Arbitrary Word8 where
arbitrary = choose (0x00,0xFF::Int) >>= return.fromIntegral
quickCheckEncoding :: Encoding enc => enc -> IO ()
quickCheckEncoding e = do
quickCheck (encodingIdentity e)
@ -76,4 +79,4 @@ decodingIdentity e wrd
Right res' -> property (bstr==res')
where
bstr = BS.pack wrd
decoded = decodeStrictByteStringExplicit e bstr
decoded = decodeStrictByteStringExplicit e bstr

View File

@ -282,4 +282,4 @@ gb18030Tests = TestList $ map test $
{-big5Tests :: Test
big5Tests = test (EncodingFileTest BIG5 "data/BIG5" "data/BIG5.UTF-8")-}