JIS X 0208 encoding
darcs-hash:20090223182459-a4fee-98ced8f8b7bac594dc6510eeecb6bea8c51a6090
This commit is contained in:
parent
0398f66695
commit
b95bfe9be4
@ -73,6 +73,7 @@ import Data.Encoding.KOI8U
|
||||
import Data.Encoding.GB18030
|
||||
import Data.Encoding.MacOSRoman
|
||||
import Data.Encoding.JISX0201
|
||||
import Data.Encoding.JISX0208
|
||||
import Data.Char
|
||||
import Text.Regex
|
||||
|
||||
@ -312,6 +313,8 @@ encodingFromStringMaybe codeName = case (normalizeEncoding codeName) of
|
||||
"macintosh" -> Just $ DynEncoding MacOSRoman
|
||||
-- JIS X 0201
|
||||
"jis_x_0201" -> Just $ DynEncoding JISX0201
|
||||
-- JIS x 0208
|
||||
"jis_x_0208" -> Just $ DynEncoding JISX0208
|
||||
-- defaults to nothing
|
||||
_ -> Nothing
|
||||
where
|
||||
|
||||
@ -37,9 +37,24 @@ encodeWithMap mp c = case Map.lookup c mp of
|
||||
Nothing -> throwException $ HasNoRepresentation c
|
||||
Just v -> pushWord8 v
|
||||
|
||||
encodeWithMap2 :: ByteSink m => Map Char (Word8,Word8) -> Char -> m ()
|
||||
encodeWithMap2 mp c = case Map.lookup c mp of
|
||||
Nothing -> throwException $ HasNoRepresentation c
|
||||
Just (w1,w2) -> do
|
||||
pushWord8 w1
|
||||
pushWord8 w2
|
||||
|
||||
decodeWithArray :: ByteSource m => Array Word8 (Maybe Char) -> m Char
|
||||
decodeWithArray arr = do
|
||||
w <- fetchWord8
|
||||
case arr!w of
|
||||
Nothing -> throwException $ IllegalCharacter w
|
||||
Just c -> return c
|
||||
|
||||
decodeWithArray2 :: ByteSource m => Array (Word8,Word8) (Maybe Char) -> m Char
|
||||
decodeWithArray2 arr = do
|
||||
w1 <- fetchWord8
|
||||
w2 <- fetchWord8
|
||||
case arr!(w1,w2) of
|
||||
Nothing -> throwException $ IllegalCharacter w1
|
||||
Just c -> return c
|
||||
@ -2,6 +2,7 @@
|
||||
module Data.Encoding.Helper.Template where
|
||||
|
||||
import Data.Encoding.Base
|
||||
import Data.Bits
|
||||
import Data.Char
|
||||
import Data.Maybe (mapMaybe)
|
||||
import Data.Map as Map (fromList,lookup)
|
||||
@ -10,66 +11,112 @@ import Language.Haskell.TH
|
||||
|
||||
makeISOInstance :: String -> FilePath -> Q [Dec]
|
||||
makeISOInstance name file = do
|
||||
let rname = mkName name
|
||||
trans <- runIO (readTranslation file)
|
||||
mp <- encodingMap (validTranslations trans)
|
||||
arr <- decodingArray (fillTranslations trans)
|
||||
return [ DataD [] rname [] [NormalC rname []] [''Show]
|
||||
, InstanceD [] (AppT (ConT ''Encoding) (ConT rname))
|
||||
[FunD 'encodeChar
|
||||
[Clause [WildP] (NormalB $ AppE (VarE 'encodeWithMap) (VarE $ mkName "mp"))
|
||||
[ValD (VarP $ mkName "mp") (NormalB mp) []]
|
||||
]
|
||||
,FunD 'decodeChar
|
||||
[Clause [WildP] (NormalB $ AppE (VarE 'decodeWithArray) (VarE $ mkName "arr"))
|
||||
[ValD (VarP $ mkName "arr") (NormalB arr) []]
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
arr <- decodingArray (fillTranslations 0 255 trans)
|
||||
return $ encodingInstance 'encodeWithMap 'decodeWithArray name mp arr
|
||||
|
||||
makeJISInstance :: String -> FilePath -> Q [Dec]
|
||||
makeJISInstance name file = do
|
||||
trans <- runIO (readJISTranslation file)
|
||||
mp <- encodingMap2 (validTranslations trans)
|
||||
arr <- decodingArray2 (fillTranslations (0x21,0x21) (0x7E,0x7E) trans)
|
||||
return $ encodingInstance 'encodeWithMap2 'decodeWithArray2 name mp arr
|
||||
|
||||
encodingInstance :: Name -> Name -> String -> Exp -> Exp -> [Dec]
|
||||
encodingInstance enc dec name mp arr
|
||||
= [ DataD [] rname [] [NormalC rname []] [''Show]
|
||||
, InstanceD [] (AppT (ConT ''Encoding) (ConT rname))
|
||||
[FunD 'encodeChar
|
||||
[Clause [WildP] (NormalB $ AppE (VarE enc) (VarE rmp))
|
||||
[ValD (VarP rmp) (NormalB mp) []]
|
||||
]
|
||||
,FunD 'decodeChar
|
||||
[Clause [WildP] (NormalB $ AppE (VarE dec) (VarE rarr))
|
||||
[ValD (VarP rarr) (NormalB arr) []]
|
||||
]
|
||||
]
|
||||
]
|
||||
where
|
||||
rname = mkName name
|
||||
rarr = mkName "arr"
|
||||
rmp = mkName "mp"
|
||||
|
||||
createCharArray :: [(Integer,Maybe Char)] -> Integer -> Integer -> Q Exp
|
||||
createCharArray lst = createArray (map (\(x,y) -> (x,case y of
|
||||
Nothing -> ConE 'Nothing
|
||||
Just c -> AppE (ConE 'Just) (LitE $ CharL c))
|
||||
) lst)
|
||||
createCharArray lst f t = createArray (map (\(x,y) ->
|
||||
(LitE $ IntegerL x,mbCharToExp y)
|
||||
) lst) (LitE $ IntegerL f) (LitE $ IntegerL t)
|
||||
|
||||
createCharArray2 :: [((Integer,Integer),Maybe Char)] -> (Integer,Integer) -> (Integer,Integer) -> Q Exp
|
||||
createCharArray2 lst (f1,f2) (t1,t2)
|
||||
= createArray (map (\((x1,x2),y) ->
|
||||
(TupE [integerExp x1,integerExp x2],mbCharToExp y)
|
||||
) lst)
|
||||
(TupE [integerExp f1,integerExp f2])
|
||||
(TupE [integerExp t1,integerExp t2])
|
||||
|
||||
createArray :: [(Integer,Exp)] -> Integer -> Integer -> Q Exp
|
||||
createArray lst from to = return $ AppE
|
||||
(AppE
|
||||
(VarE 'array)
|
||||
(TupE [LitE $ IntegerL from,LitE $ IntegerL to]))
|
||||
(ListE [ TupE [LitE $ IntegerL x,y]
|
||||
| (x,y) <- lst ])
|
||||
integerExp :: Integer -> Exp
|
||||
integerExp i = LitE $ IntegerL i
|
||||
|
||||
mbCharToExp :: Maybe Char -> Exp
|
||||
mbCharToExp Nothing = ConE 'Nothing
|
||||
mbCharToExp (Just c) = AppE (ConE 'Just) (LitE $ CharL c)
|
||||
|
||||
createArray :: [(Exp,Exp)] -> Exp -> Exp -> Q Exp
|
||||
createArray lst from to
|
||||
= return $ AppE
|
||||
(AppE
|
||||
(VarE 'array)
|
||||
(TupE [from,to]))
|
||||
(ListE [TupE [x,y] | (x,y) <- lst])
|
||||
|
||||
decodingArray :: [(Integer,Maybe Char)] -> Q Exp
|
||||
decodingArray trans = createCharArray trans 0 255
|
||||
|
||||
decodingArray2 :: [((Integer,Integer),Maybe Char)] -> Q Exp
|
||||
decodingArray2 trans = createCharArray2 trans (0x21,0x21) (0x7E,0x7E)
|
||||
|
||||
encodingMap :: [(Integer,Char)] -> Q Exp
|
||||
encodingMap trans = return $ AppE
|
||||
(VarE 'fromList)
|
||||
(ListE [ TupE [LitE $ CharL to,LitE $ IntegerL from]
|
||||
| (from,to) <- trans])
|
||||
|
||||
encodingMap2 :: [((Integer,Integer),Char)] -> Q Exp
|
||||
encodingMap2 trans = return $ AppE
|
||||
(VarE 'fromList)
|
||||
(ListE [ TupE [LitE $ CharL to,TupE [integerExp f1,integerExp f2]]
|
||||
| ((f1,f2),to) <- trans])
|
||||
|
||||
readTranslation :: FilePath -> IO [(Integer,Maybe Char)]
|
||||
readTranslation file = do
|
||||
cont <- readFile file
|
||||
return $ mapMaybe (\ln -> case ln of
|
||||
[] -> Nothing
|
||||
('#':xs) -> Nothing
|
||||
_ -> case words ln of
|
||||
(src:"#UNDEFINED":_) -> Just (read src,Nothing)
|
||||
(src:trg:_) -> Just (read src,Just $ chr $ read trg)
|
||||
_ -> Nothing
|
||||
) (lines cont)
|
||||
cont <- readFile file
|
||||
return $ mapMaybe (\ln -> case ln of
|
||||
[src] -> Just (src,Nothing)
|
||||
[src,trg] -> Just (src,Just $ chr $ fromIntegral trg)
|
||||
_ -> Nothing) (parseTranslationTable cont)
|
||||
|
||||
fillTranslations :: [(Integer,Maybe Char)] -> [(Integer,Maybe Char)]
|
||||
fillTranslations = fillTranslations' (-1)
|
||||
readJISTranslation :: FilePath -> IO [((Integer,Integer),Maybe Char)]
|
||||
readJISTranslation file = do
|
||||
cont <- readFile file
|
||||
return $ mapMaybe (\ln -> case ln of
|
||||
[_,src] -> Just ((src `shiftR` 8,src .&. 0xFF),Nothing)
|
||||
[_,src,trg] -> Just ((src `shiftR` 8,src .&. 0xFF),Just $ chr $ fromIntegral trg)
|
||||
_ -> Nothing) (parseTranslationTable cont)
|
||||
|
||||
parseTranslationTable :: String -> [[Integer]]
|
||||
parseTranslationTable cont = filter (not.null) (map (\ln -> map read (takeWhile ((/='#').head) (words ln))) (lines cont))
|
||||
|
||||
fillTranslations :: Ix a => a -> a -> [(a,Maybe Char)] -> [(a,Maybe Char)]
|
||||
fillTranslations f t = merge (range (f,t))
|
||||
where
|
||||
fillTranslations' n ((n',c):cs) = (map (\i -> (i,Nothing)) [n+1..n'-1])++((n',c):fillTranslations' n' cs)
|
||||
fillTranslations' n [] = map (\i -> (i,Nothing)) [n+1..255]
|
||||
merge xs [] = map (\x -> (x,Nothing)) xs
|
||||
merge [] _ = error "Data.Encoding.Helper.Template.fillTranslations: Character translations out of range"
|
||||
merge (x:xs) (y:ys) = if x < fst y
|
||||
then (x,Nothing):(merge xs (y:ys))
|
||||
else y:(merge xs ys)
|
||||
|
||||
validTranslations :: [(Integer,Maybe Char)] -> [(Integer,Char)]
|
||||
validTranslations :: [(a,Maybe Char)] -> [(a,Char)]
|
||||
validTranslations = mapMaybe (\(n,mc) -> case mc of
|
||||
Nothing -> Nothing
|
||||
Just c -> Just (n,c))
|
||||
6
Data/Encoding/JISX0208.hs
Normal file
6
Data/Encoding/JISX0208.hs
Normal file
@ -0,0 +1,6 @@
|
||||
{-# LANGUAGE TemplateHaskell #-}
|
||||
module Data.Encoding.JISX0208 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeJISInstance)
|
||||
|
||||
$( makeJISInstance "JISX0208" "jis0208.txt" )
|
||||
@ -17,6 +17,7 @@ import Data.Encoding.ISO885910
|
||||
import Data.Encoding.ISO885911
|
||||
import Data.Encoding.ISO885913
|
||||
import Data.Encoding.ISO885914
|
||||
import Data.Encoding.JISX0208
|
||||
import Data.Encoding.BootString
|
||||
import Test.HUnit
|
||||
import Test.QuickCheck hiding (test)
|
||||
@ -227,4 +228,16 @@ punycodeTests = TestList $ map test $
|
||||
\baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF"
|
||||
"foobarbaz"-}
|
||||
]
|
||||
where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)
|
||||
where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)
|
||||
|
||||
isoTests :: Test
|
||||
isoTests = TestList $ map test $
|
||||
[EncodingTest ISO88592 "\x104\x2D8\x141\xA4\x13D\x15A\xA7\xA8\x160\x15E\x164\x179\xAD\x17D\x17B\xB0\x105\x2DB\x142\xB4\x13E\x15B\x2C7\xB8\x161\x15F"
|
||||
[0xA1..0xBA]
|
||||
]
|
||||
|
||||
jisTests :: Test
|
||||
jisTests = TestList $ map test $
|
||||
[EncodingTest JISX0208 "\x4E9C"
|
||||
[0x30,0x21]
|
||||
]
|
||||
@ -61,4 +61,5 @@ Library
|
||||
Data.Encoding.BootString
|
||||
Data.Encoding.MacOSRoman
|
||||
Data.Encoding.JISX0201
|
||||
Data.Encoding.JISX0208
|
||||
System.IO.Encoding
|
||||
Loading…
Reference in New Issue
Block a user