Changed UTF16 to support big/little endian encoding
darcs-hash:20071105213105-a4fee-f3d5a91bee421e5e6c2d1d44787a421bb2d07b72
This commit is contained in:
parent
cb81698b7e
commit
6b82f1bb71
@ -11,16 +11,19 @@ import Data.Int
|
|||||||
import Data.Word
|
import Data.Word
|
||||||
import Data.ByteString
|
import Data.ByteString
|
||||||
import qualified Data.ByteString.Lazy as LBS
|
import qualified Data.ByteString.Lazy as LBS
|
||||||
import Data.ByteString.Base(w2c,c2w)
|
|
||||||
import Prelude hiding (length)
|
import Prelude hiding (length)
|
||||||
import Control.Exception
|
import Control.Exception
|
||||||
import Data.Dynamic (toDyn)
|
import Data.Dynamic (toDyn)
|
||||||
|
|
||||||
data UTF16 = UTF16 deriving Show
|
data UTF16
|
||||||
|
= UTF16
|
||||||
|
| UTF16BE
|
||||||
|
| UTF16LE
|
||||||
|
deriving (Eq,Show)
|
||||||
|
|
||||||
utf16enc :: (EncodeState,String) -> Maybe (Word8,(EncodeState,String))
|
utf16enc :: Bool -> (EncodeState,String) -> Maybe (Word8,(EncodeState,String))
|
||||||
utf16enc (Done,[]) = Nothing
|
utf16enc _ (Done,[]) = Nothing
|
||||||
utf16enc (Done,x:xs)
|
utf16enc True (Done,x:xs)
|
||||||
| n<=0x0000FFFF = Just
|
| n<=0x0000FFFF = Just
|
||||||
(fromIntegral $ n `shiftR` 8
|
(fromIntegral $ n `shiftR` 8
|
||||||
,(Put1 (fromIntegral $ n),xs))
|
,(Put1 (fromIntegral $ n),xs))
|
||||||
@ -34,9 +37,23 @@ utf16enc (Done,x:xs)
|
|||||||
where
|
where
|
||||||
n = ord x
|
n = ord x
|
||||||
n' = n - 0x10000
|
n' = n - 0x10000
|
||||||
utf16enc (Put3 w1 w2 w3,xs) = Just (w1,(Put2 w2 w3,xs))
|
utf16enc False (Done,x:xs)
|
||||||
utf16enc (Put2 w1 w2,xs) = Just (w1,(Put1 w2,xs))
|
| n<=0x0000FFFF = Just
|
||||||
utf16enc (Put1 w1,xs) = Just (w1,(Done,xs))
|
(fromIntegral $ n
|
||||||
|
,(Put1 (fromIntegral $ n `shiftR` 8),xs))
|
||||||
|
| n<=0x0010FFFF = Just
|
||||||
|
(fromIntegral n'
|
||||||
|
,(Put3 (fromIntegral $
|
||||||
|
0xDC .|. ((n' `shiftR` 8) .&. 0x03))
|
||||||
|
(fromIntegral $ (n' `shiftR` 10))
|
||||||
|
(fromIntegral $ 0xD8 .|. (n' `shiftR` 18)),xs))
|
||||||
|
| otherwise = throwDyn $ HasNoRepresentation x
|
||||||
|
where
|
||||||
|
n = ord x
|
||||||
|
n' = n - 0x10000
|
||||||
|
utf16enc _ (Put3 w1 w2 w3,xs) = Just (w1,(Put2 w2 w3,xs))
|
||||||
|
utf16enc _ (Put2 w1 w2,xs) = Just (w1,(Put1 w2,xs))
|
||||||
|
utf16enc _ (Put1 w1,xs) = Just (w1,(Done,xs))
|
||||||
|
|
||||||
{-# SPECIALIZE utf16dec :: Bool -> Word8 -> Word8 -> Word8 -> Word8 -> (Char,Int) #-}
|
{-# SPECIALIZE utf16dec :: Bool -> Word8 -> Word8 -> Word8 -> Word8 -> (Char,Int) #-}
|
||||||
{-# SPECIALIZE utf16dec :: Bool -> Word8 -> Word8 -> Word8 -> Word8 -> (Char,Int64) #-}
|
{-# SPECIALIZE utf16dec :: Bool -> Word8 -> Word8 -> Word8 -> Word8 -> (Char,Int64) #-}
|
||||||
@ -54,8 +71,12 @@ utf16dec be s1 s2 s3 s4
|
|||||||
(w1,w2,w3,w4) = if be then (s1,s2,s3,s4) else (s2,s1,s4,s3)
|
(w1,w2,w3,w4) = if be then (s1,s2,s3,s4) else (s2,s1,s4,s3)
|
||||||
|
|
||||||
instance Encoding UTF16 where
|
instance Encoding UTF16 where
|
||||||
encode _ str = unfoldr utf16enc (Put2 0xFE 0xFF,str)
|
encode enc str = unfoldr (utf16enc (enc/=UTF16LE)) (case enc of
|
||||||
encodeLazy _ str = LBS.unfoldr utf16enc (Put2 0xFE 0xFF,str)
|
UTF16 -> Put2 0xFE 0xFF
|
||||||
|
_ -> Done,str)
|
||||||
|
encodeLazy enc str = LBS.unfoldr (utf16enc (enc/=UTF16LE)) (case enc of
|
||||||
|
UTF16 -> Put2 0xFE 0xFF
|
||||||
|
_ -> Done,str)
|
||||||
encodable _ c = ord c <= 0x0010FFFF
|
encodable _ c = ord c <= 0x0010FFFF
|
||||||
decode _ str = case findByteOrder str of
|
decode _ str = case findByteOrder str of
|
||||||
Nothing -> decode' True 0
|
Nothing -> decode' True 0
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user