From 6c5011aaa30bdf9eac3b85e2a548bb3b8dca7888 Mon Sep 17 00:00:00 2001 From: Henning Guenther Date: Sun, 20 Jan 2008 13:33:22 -0800 Subject: [PATCH] Added KOI8U encoding darcs-hash:20080120213322-a4fee-752cc3f3ec4e7a01e5bbf93bad08a88ca3cb3a5b --- Data/Encoding.hs | 2 ++ Data/Encoding/KOI8R.hs | 3 +- Data/Encoding/KOI8U.hs | 64 ++++++++++++++++++++++++++++++++++++++++++ encoding.cabal | 1 + 4 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 Data/Encoding/KOI8U.hs diff --git a/Data/Encoding.hs b/Data/Encoding.hs index 1b7a189..0962372 100644 --- a/Data/Encoding.hs +++ b/Data/Encoding.hs @@ -44,6 +44,7 @@ import Data.Encoding.CP1256 import Data.Encoding.CP1257 import Data.Encoding.CP1258 import Data.Encoding.KOI8R +import Data.Encoding.KOI8U import Data.Encoding.GB18030 -- | An untyped encoding. Used in 'System.IO.Encoding.getSystemEncoding'. @@ -80,6 +81,7 @@ encodingFromStringMaybe "UTF-8" = Just $ DynEncoding UTF8 encodingFromStringMaybe "UTF-16" = Just $ DynEncoding UTF16 encodingFromStringMaybe "UTF-32" = Just $ DynEncoding UTF32 encodingFromStringMaybe "KOI8-R" = Just $ DynEncoding KOI8R +encodingFromStringMaybe "KOI8-U" = Just $ DynEncoding KOI8U encodingFromStringMaybe "ISO-8859-1" = Just $ DynEncoding ISO88591 encodingFromStringMaybe "ISO-8859-2" = Just $ DynEncoding ISO88592 encodingFromStringMaybe "ISO-8859-3" = Just $ DynEncoding ISO88593 diff --git a/Data/Encoding/KOI8R.hs b/Data/Encoding/KOI8R.hs index 79c0b67..1a0ab39 100644 --- a/Data/Encoding/KOI8R.hs +++ b/Data/Encoding/KOI8R.hs @@ -41,7 +41,8 @@ koi8rList = ,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413' ,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e' ,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412' - ,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'] + ,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a' + ] koi8rDecode :: Word8 -> Char koi8rDecode ch diff --git a/Data/Encoding/KOI8U.hs b/Data/Encoding/KOI8U.hs new file mode 100644 index 0000000..fb4970d --- /dev/null +++ b/Data/Encoding/KOI8U.hs @@ -0,0 +1,64 @@ +{-# LANGUAGE DeriveDataTypeable #-} +{- | This module implements KOI8-U encoding which covers the ukrainian alphabet. + See for more information. + -} +module Data.Encoding.KOI8U + (KOI8U(..)) where + +import Control.Exception (throwDyn) +import Data.Word +import Data.Array.Unboxed +import Data.Encoding.Base +import Data.Char (chr,ord) +import Data.Map (Map,fromList,lookup,member) +import qualified Data.ByteString.Lazy as Lazy +import Prelude hiding (lookup) +import Data.Typeable + +data KOI8U = KOI8U deriving (Eq,Show,Typeable) + +koi8uArr :: UArray Word8 Char +koi8uArr = listArray (128,255) koi8uList + +koi8uMap :: Map Char Word8 +koi8uMap = fromList (zip koi8uList [0..]) + +koi8uList :: [Char] +koi8uList = + ['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524' + ,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590' + ,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248' + ,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7' + ,'\x2550','\x2551','\x2552','\x0451','\x0454','\x2554','\x0456','\x0457' + ,'\x2557','\x2558','\x2559','\x255a','\x255b','\x0491','\x255d','\x255e' + ,'\x255f','\x2560','\x2561','\x0401','\x0403','\x2563','\x0406','\x0407' + ,'\x2566','\x2567','\x2568','\x2569','\x256a','\x0490','\x256c','\x00a9' + ,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433' + ,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e' + ,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432' + ,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a' + ,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413' + ,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e' + ,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412' + ,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a' + ] + +koi8uDecode :: Word8 -> Char +koi8uDecode ch + | ch < 128 = chr $ fromIntegral ch + | otherwise = koi8uArr!ch + +koi8uEncode :: Char -> Word8 +koi8uEncode ch + | ch < '\128' = fromIntegral $ ord ch + | otherwise = case lookup ch koi8uMap of + Just w -> w + Nothing -> throwDyn (HasNoRepresentation ch) + +instance Encoding KOI8U where + encode _ = encodeSinglebyte koi8uEncode + encodeLazy _ = encodeSinglebyteLazy koi8uEncode + encodable _ c = (c < '\128') || (member c koi8uMap) + decode _ = decodeSinglebyte koi8uDecode + decodeLazy _ str = concatMap (decodeSinglebyte koi8uDecode) (Lazy.toChunks str) + decodable _ = const True diff --git a/encoding.cabal b/encoding.cabal index 736395b..6758311 100644 --- a/encoding.cabal +++ b/encoding.cabal @@ -83,6 +83,7 @@ Library Data.Encoding.CP1257 Data.Encoding.CP1258 Data.Encoding.KOI8R + Data.Encoding.KOI8U Data.Encoding.GB18030 System.IO.Encoding Other-Modules: