Changed UTF16 to support big/little endian encoding

darcs-hash:20071105213105-a4fee-f3d5a91bee421e5e6c2d1d44787a421bb2d07b72
This commit is contained in:
Henning Guenther 2007-11-05 13:31:05 -08:00
parent cb81698b7e
commit 6b82f1bb71

View File

@ -11,16 +11,19 @@ import Data.Int
import Data.Word import Data.Word
import Data.ByteString import Data.ByteString
import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Lazy as LBS
import Data.ByteString.Base(w2c,c2w)
import Prelude hiding (length) import Prelude hiding (length)
import Control.Exception import Control.Exception
import Data.Dynamic (toDyn) import Data.Dynamic (toDyn)
data UTF16 = UTF16 deriving Show data UTF16
= UTF16
| UTF16BE
| UTF16LE
deriving (Eq,Show)
utf16enc :: (EncodeState,String) -> Maybe (Word8,(EncodeState,String)) utf16enc :: Bool -> (EncodeState,String) -> Maybe (Word8,(EncodeState,String))
utf16enc (Done,[]) = Nothing utf16enc _ (Done,[]) = Nothing
utf16enc (Done,x:xs) utf16enc True (Done,x:xs)
| n<=0x0000FFFF = Just | n<=0x0000FFFF = Just
(fromIntegral $ n `shiftR` 8 (fromIntegral $ n `shiftR` 8
,(Put1 (fromIntegral $ n),xs)) ,(Put1 (fromIntegral $ n),xs))
@ -34,9 +37,23 @@ utf16enc (Done,x:xs)
where where
n = ord x n = ord x
n' = n - 0x10000 n' = n - 0x10000
utf16enc (Put3 w1 w2 w3,xs) = Just (w1,(Put2 w2 w3,xs)) utf16enc False (Done,x:xs)
utf16enc (Put2 w1 w2,xs) = Just (w1,(Put1 w2,xs)) | n<=0x0000FFFF = Just
utf16enc (Put1 w1,xs) = Just (w1,(Done,xs)) (fromIntegral $ n
,(Put1 (fromIntegral $ n `shiftR` 8),xs))
| n<=0x0010FFFF = Just
(fromIntegral n'
,(Put3 (fromIntegral $
0xDC .|. ((n' `shiftR` 8) .&. 0x03))
(fromIntegral $ (n' `shiftR` 10))
(fromIntegral $ 0xD8 .|. (n' `shiftR` 18)),xs))
| otherwise = throwDyn $ HasNoRepresentation x
where
n = ord x
n' = n - 0x10000
utf16enc _ (Put3 w1 w2 w3,xs) = Just (w1,(Put2 w2 w3,xs))
utf16enc _ (Put2 w1 w2,xs) = Just (w1,(Put1 w2,xs))
utf16enc _ (Put1 w1,xs) = Just (w1,(Done,xs))
{-# SPECIALIZE utf16dec :: Bool -> Word8 -> Word8 -> Word8 -> Word8 -> (Char,Int) #-} {-# SPECIALIZE utf16dec :: Bool -> Word8 -> Word8 -> Word8 -> Word8 -> (Char,Int) #-}
{-# SPECIALIZE utf16dec :: Bool -> Word8 -> Word8 -> Word8 -> Word8 -> (Char,Int64) #-} {-# SPECIALIZE utf16dec :: Bool -> Word8 -> Word8 -> Word8 -> Word8 -> (Char,Int64) #-}
@ -54,8 +71,12 @@ utf16dec be s1 s2 s3 s4
(w1,w2,w3,w4) = if be then (s1,s2,s3,s4) else (s2,s1,s4,s3) (w1,w2,w3,w4) = if be then (s1,s2,s3,s4) else (s2,s1,s4,s3)
instance Encoding UTF16 where instance Encoding UTF16 where
encode _ str = unfoldr utf16enc (Put2 0xFE 0xFF,str) encode enc str = unfoldr (utf16enc (enc/=UTF16LE)) (case enc of
encodeLazy _ str = LBS.unfoldr utf16enc (Put2 0xFE 0xFF,str) UTF16 -> Put2 0xFE 0xFF
_ -> Done,str)
encodeLazy enc str = LBS.unfoldr (utf16enc (enc/=UTF16LE)) (case enc of
UTF16 -> Put2 0xFE 0xFF
_ -> Done,str)
encodable _ c = ord c <= 0x0010FFFF encodable _ c = ord c <= 0x0010FFFF
decode _ str = case findByteOrder str of decode _ str = case findByteOrder str of
Nothing -> decode' True 0 Nothing -> decode' True 0