diff --git a/Data/Encoding/Helper/Data.hs b/Data/Encoding/Helper/Data.hs deleted file mode 100644 index fede430..0000000 --- a/Data/Encoding/Helper/Data.hs +++ /dev/null @@ -1,116 +0,0 @@ -{- This module is used to create a haskell file with static address-arrays in it -} - -module Data.Encoding.Helper.Data where - -import Data.Char (ord) -import Data.Word (Word8) -import Data.Ord (comparing) -import Data.Bits (shiftR) -import Data.List (sortBy,genericLength) -import Data.Encoding.Helper.XML - -linear :: Word8 -> Word8 -> Word8 -> Word8 -> Int -linear w1 w2 w3 w4 - = (fromIntegral (w4-0x30)) - + (fromIntegral (w3-0x81))*10 - + (fromIntegral (w2-0x30))*1260 - + (fromIntegral (w1-0x81))*12600 - -linear2 :: Word8 -> Word8 -> Int -linear2 w1 w2 = (fromIntegral (w2 - (if w2<=0x7E - then 0x40 - else 0x41))) - + (fromIntegral (w1-0x81))*190 - -createStandardModule :: IO () -createStandardModule = do - let fn = "gb-18030-2000.xml" - str <- readFile fn - writeFile "Data/Encoding/GB18030Data.hs" $ createModuleFromFile fn str standardRanges standardRRanges - -standardRanges = - [('\x0000','\x0451') - ,('\x2010','\x2642') - ,('\x2E81','\x361A') - ,('\x3918','\x3CE0') - ,('\x4056','\x415F') - ,('\x4337','\x44D6') - ,('\x464C','\x478D') - ,('\x4947','\x49B7') - ,('\x4C77','\x9FA5') - ,('\xD800','\xE864') - ,('\xF92C','\xFA29') - ,('\xFE30','\xFFE5') - ] - -standardRRanges = - [( 0, 819) - ,( 7922, 9218) - ,(11329,12972) - ,(13738,14697) - ,(15583,15846) - ,(16318,16728) - ,(17102,17417) - ,(17859,17960) - ,(18664,19042) - ,(33469,33549) - ,(37845,38077) - ,(39108,39393) - ,(39420,188999)] - -createModuleFromFile :: String -> String -> [(Char,Char)] -> [(Int,Int)] -> String -createModuleFromFile name str = createModule (readDecodeTable name str) - -createModule :: [(Char,[Word8])] -> [(Char,Char)] -> [(Int,Int)] -> String -createModule mp ranges rranges = unlines $ - ["{-# LANGUAGE CPP,MagicHash #-}" - ,"module Data.Encoding.GB18030Data where" - ,"" - ,"import Data.ByteString(ByteString)" - ,"#if __GLASGOW_HASKELL__>=608" - ,"import Data.ByteString.Unsafe(unsafePackAddressLen)" - ,"#else" - ,"import Data.ByteString.Base(unsafePackAddressLen)" - ,"#endif" - ,"import System.IO.Unsafe(unsafePerformIO)"] - ++ (createAddrVars "arr" (map (uncurry $ createAddr mp) ranges)) - ++ (createAddrVars "rarr" (map (uncurry $ createRAddr4 mp) rranges)) - ++ (createAddrVar "rrarr" (createRAddr2 mp)) - -createAddrVars :: String -> [[Word8]] -> [String] -createAddrVars base conts = concatMap (\(n,cont) -> - createAddrVar (base++show n) cont) (zip [1..] conts) - -createAddrVar :: String -> [Word8] -> [String] -createAddrVar name cont = - ["" - ,name++" :: ByteString" - ,name++" = unsafePerformIO $ unsafePackAddressLen "++show (length cont)++" \""++addr cont++"\"#" - ] - -createAddr :: [(Char,[Word8])] -> Char -> Char -> [Word8] -createAddr mp f t = let - lst = sortBy (comparing fst) [el | el@(ch,_) <- mp, ch>=f, ch<=t] - in concatMap (\(ch,seq) -> let - l = length seq - in [fromIntegral l]++seq++(replicate (4-l) 0)) lst - -createRAddr2 :: [(Char,[Word8])] -> [Word8] -createRAddr2 mp = let - lst = sortBy (comparing snd) - [ (ch,v) | (ch,[w1,w2]) <- mp,let v = linear2 w1 w2] - in concatMap (\(ch,_) -> let i = ord ch - in [fromIntegral (i `shiftR` 8) - ,fromIntegral i]) lst - -createRAddr4 :: [(Char,[Word8])] -> Int -> Int -> [Word8] -createRAddr4 mp f t = let - lst = sortBy (comparing snd) - [ (ch,v) | (ch,[w1,w2,w3,w4]) <- mp, - let v = linear w1 w2 w3 w4, v>=f, v<=t ] - in concatMap (\(ch,_) -> let i = ord ch - in [fromIntegral (i `shiftR` 8) - ,fromIntegral i]) lst - -addr :: [Word8] -> String -addr = concatMap (\w -> "\\"++show w) diff --git a/Data/Encoding/Helper/Template.hs b/Data/Encoding/Helper/Template.hs deleted file mode 100644 index f179bb3..0000000 --- a/Data/Encoding/Helper/Template.hs +++ /dev/null @@ -1,120 +0,0 @@ -{-# LANGUAGE TemplateHaskell #-} -module Data.Encoding.Helper.Template where - -import Data.Encoding.Base -import Data.Bits -import Data.Char -import Data.Maybe (mapMaybe) -import Data.Map as Map (fromList,lookup) -import Data.Array.Unboxed -import Data.Typeable -import Language.Haskell.TH - -makeISOInstance :: String -> FilePath -> Q [Dec] -makeISOInstance name file = do - trans <- runIO (readTranslation 0 id file) - mp <- encodingMap (validTranslations trans) - arr <- decodingArray (fillTranslations 0 255 trans) - return $ encodingInstance 'encodeWithMap 'decodeWithArray 'encodeableWithMap name mp arr - -makeJISInstance :: Int -> String -> FilePath -> Q [Dec] -makeJISInstance offset name file = do - trans <- runIO (readTranslation offset (\src -> (src `shiftR` 8,src .&. 0xFF)) file) - mp <- encodingMap2 (validTranslations trans) - arr <- decodingArray2 (fillTranslations (0x21,0x21) (0x7E,0x7E) trans) - return $ encodingInstance 'encodeWithMap2 'decodeWithArray2 'encodeableWithMap name mp arr - -encodingInstance :: Name -> Name -> Name -> String -> Exp -> Exp -> [Dec] -encodingInstance enc dec able name mp arr - = [ DataD [] rname [] [NormalC rname []] [''Show,''Eq,''Typeable] - , ValD (VarP rmp) (NormalB mp) [] - , InstanceD [] (AppT (ConT ''Encoding) (ConT rname)) - [FunD 'encodeChar - [Clause [WildP] (NormalB $ AppE (VarE enc) (VarE rmp)) - [] - ] - ,FunD 'decodeChar - [Clause [WildP] (NormalB $ AppE (VarE dec) (VarE rarr)) - [ValD (VarP rarr) (NormalB arr) []] - ] - ,FunD 'encodeable - [Clause [WildP] (NormalB $ AppE (VarE able) (VarE rmp)) - [] - ] - ] - ] - where - rname = mkName name - rarr = mkName "arr" - rmp = mkName ("decoding_map_"++name) - -createCharArray :: [(Integer,Maybe Char)] -> Integer -> Integer -> Q Exp -createCharArray lst f t = createArray (map (\(x,y) -> - (LitE $ IntegerL x,mbCharToExp y) - ) lst) (LitE $ IntegerL f) (LitE $ IntegerL t) - -createCharArray2 :: [((Integer,Integer),Maybe Char)] -> (Integer,Integer) -> (Integer,Integer) -> Q Exp -createCharArray2 lst (f1,f2) (t1,t2) - = createArray (map (\((x1,x2),y) -> - (TupE [integerExp x1,integerExp x2],mbCharToExp y) - ) lst) - (TupE [integerExp f1,integerExp f2]) - (TupE [integerExp t1,integerExp t2]) - -integerExp :: Integer -> Exp -integerExp i = LitE $ IntegerL i - -mbCharToExp :: Maybe Char -> Exp -mbCharToExp Nothing = LitE (IntegerL (-1)) -mbCharToExp (Just c) = LitE (IntegerL $ fromIntegral $ ord c) - -createArray :: [(Exp,Exp)] -> Exp -> Exp -> Q Exp -createArray lst from to - = return $ AppE - (AppE - (VarE 'array) - (TupE [from,to])) - (ListE [TupE [x,y] | (x,y) <- lst]) - -decodingArray :: [(Integer,Maybe Char)] -> Q Exp -decodingArray trans = createCharArray trans 0 255 - -decodingArray2 :: [((Integer,Integer),Maybe Char)] -> Q Exp -decodingArray2 trans = createCharArray2 trans (0x21,0x21) (0x7E,0x7E) - -encodingMap :: [(Integer,Char)] -> Q Exp -encodingMap trans = return $ AppE - (VarE 'fromList) - (ListE [ TupE [LitE $ CharL to,LitE $ IntegerL from] - | (from,to) <- trans]) - -encodingMap2 :: [((Integer,Integer),Char)] -> Q Exp -encodingMap2 trans = return $ AppE - (VarE 'fromList) - (ListE [ TupE [LitE $ CharL to,TupE [integerExp f1,integerExp f2]] - | ((f1,f2),to) <- trans]) - -readTranslation :: Int -> (Integer -> a) -> FilePath -> IO [(a,Maybe Char)] -readTranslation offset f file = do - cont <- readFile file - return $ mapMaybe (\ln -> case drop offset ln of - [src] -> Just (f src,Nothing) - [src,trg] -> Just (f src,Just $ chr $ fromIntegral trg) - _ -> Nothing) (parseTranslationTable cont) - -parseTranslationTable :: String -> [[Integer]] -parseTranslationTable cont = filter (not.null) (map (\ln -> map read (takeWhile ((/='#').head) (words ln))) (lines cont)) - -fillTranslations :: (Ix a,Show a) => a -> a -> [(a,Maybe Char)] -> [(a,Maybe Char)] -fillTranslations f t = merge (range (f,t)) - where - merge xs [] = map (\x -> (x,Nothing)) xs - merge [] cs = error $ "Data.Encoding.Helper.Template.fillTranslations: Character translations out of range: " ++ show cs - merge (x:xs) (y:ys) = if x < fst y - then (x,Nothing):(merge xs (y:ys)) - else y:(merge xs ys) - -validTranslations :: [(a,Maybe Char)] -> [(a,Char)] -validTranslations = mapMaybe (\(n,mc) -> case mc of - Nothing -> Nothing - Just c -> Just (n,c)) \ No newline at end of file diff --git a/Data/Encoding/Helper/XML.hs b/Data/Encoding/Helper/XML.hs deleted file mode 100644 index cdb1edf..0000000 --- a/Data/Encoding/Helper/XML.hs +++ /dev/null @@ -1,41 +0,0 @@ -module Data.Encoding.Helper.XML where - -import Data.Char -import Data.List (find) -import Data.Word -import Data.Maybe (mapMaybe) -import Text.XML.HaXml.Parse -import Text.XML.HaXml.Types -import Text.XML.HaXml.Verbatim -import Numeric - -readDecodeTable :: String -> String -> [(Char,[Word8])] -readDecodeTable name str = let - Document _ _ (Elem root_name _ conts) _ = xmlParse name str - in concat $ mapMaybe findAssignments conts - -findAssignments :: Content i -> Maybe [(Char,[Word8])] -findAssignments (CElem (Elem "assignments" _ conts) _) - = Just $ mapMaybe findAssignment conts -findAssignments _ = Nothing - -findAssignment :: Content i -> Maybe (Char,[Word8]) -findAssignment (CElem (Elem "a" attrs _) _) = do - u <- lookup "u" attrs - b <- lookup "b" attrs - return (chr $ readHexInt (showAttValue u),parseBinary b) -findAssignment _ = Nothing - -parseBinary :: AttValue -> [Word8] -parseBinary val = map (fromIntegral.readHexInt) (words (showAttValue val)) - -showAttValue :: AttValue -> String -showAttValue (AttValue lst) = concat $ map (\el -> case el of - Left str -> str - Right ref -> verbatim ref) lst - -readHexInt :: String -> Int -readHexInt str = case find (\x -> snd x == "") (readHex str) of - Nothing -> error "Not a hex" - Just (x,_) -> x -