From 39af34b0a3431e84dac4cb89fadfaccb58d64ebf Mon Sep 17 00:00:00 2001 From: Henning Guenther Date: Wed, 12 Aug 2009 19:41:09 -0700 Subject: [PATCH] Added Preprocessor for XML mappings and normal mappings Ignore-this: dc0902f526ceb99db528e14c9e3ad563 darcs-hash:20090813024109-a4fee-447c0ff194c227ed919d6eef0f7824e63276183e --- Data/Encoding/Preprocessor/Mapping.hs | 141 +++++ Data/Encoding/Preprocessor/XMLMapping.hs | 581 ++++++++++++++++++ .../Preprocessor/XMLMappingBuilder.hs | 315 ++++++++++ Setup.hs | 11 +- 4 files changed, 1046 insertions(+), 2 deletions(-) create mode 100644 Data/Encoding/Preprocessor/Mapping.hs create mode 100644 Data/Encoding/Preprocessor/XMLMapping.hs create mode 100644 Data/Encoding/Preprocessor/XMLMappingBuilder.hs diff --git a/Data/Encoding/Preprocessor/Mapping.hs b/Data/Encoding/Preprocessor/Mapping.hs new file mode 100644 index 0000000..0c1ae0e --- /dev/null +++ b/Data/Encoding/Preprocessor/Mapping.hs @@ -0,0 +1,141 @@ +module Data.Encoding.Preprocessor.Mapping where + +import Distribution.Simple.PreProcess +import Distribution.Simple.Utils +import System.IO +import System.FilePath +import Data.List (intersperse,unfoldr) +import Data.Maybe +import Data.Char +import Data.Word +import Data.Ix +import Data.Bits +import Data.Encoding.Preprocessor.Addr +import Data.Array.Static.Builder +import Data.CharMap.Builder + +data MappingType + = ISOMapping + | JISMapping + deriving (Eq,Ord,Show,Read) + +readTranslation :: Int -> FilePath -> IO [(Integer,Maybe Char)] +readTranslation offset file = do + cont <- readFile file + return $ mapMaybe (\ln -> case drop offset ln of + [src] -> Just (src,Nothing) + [src,trg] -> Just (src,Just $ chr $ fromIntegral trg) + _ -> Nothing) (parseTranslationTable cont) + +parseTranslationTable :: String -> [[Integer]] +parseTranslationTable cont = filter (not.null) (map (\ln -> map read (takeWhile ((/='#').head) (words ln))) (lines cont)) + +{-fillTranslations :: (Ix a,Show a) => a -> a -> [(a,Maybe Char)] -> [(a,Maybe Char)] +fillTranslations f t = merge (range (f,t)) + where + merge xs [] = map (\x -> (x,Nothing)) xs + merge [] cs = error $ "Data.Encoding.Helper.Template.fillTranslations: Character translations out of range: " ++ show cs + merge (x:xs) (y:ys) = if x < fst y + then (x,Nothing):(merge xs (y:ys)) + else y:(merge xs ys)-} + +fillTranslations :: (Enum a,Eq a) => [(a,Maybe Char)] -> (a,a,[Maybe Char]) +fillTranslations [] = error "fillTranslations: zero elements" +fillTranslations ((s,c):rest) = let (e,r) = fill' s rest + fill' cur [] = (cur,[]) + fill' cur all@((n,c):rest2) = if succ cur == n + then (let (e',res) = fill' n rest2 + in (e',c:res)) + else (let (e',res) = fill' (succ cur) all + in (e',Nothing:res)) + in (s,e,c:r) + +validTranslations :: [(a,Maybe Char)] -> [(a,Char)] +validTranslations = mapMaybe (\(n,mc) -> case mc of + Nothing -> Nothing + Just c -> Just (n,c)) + +mappingPreprocessor :: PreProcessor +mappingPreprocessor = PreProcessor + {platformIndependent = True + ,runPreProcessor = \(sbase,sfile) (tbase,tfile) verb -> do + let (dir,fn) = splitFileName sfile + let (bname,ext) = splitExtensions fn + let dirs = splitDirectories dir + let tp = case ext of + ".mapping" -> ISOMapping + ".mapping2" -> JISMapping + info verb (tfile++" generated from mapping "++sfile) + preprocessMapping tp (sbase sfile) (tbase tfile) dirs bname + } + +preprocessMapping :: MappingType -> FilePath -> FilePath -> [String] -> String -> IO () +preprocessMapping tp src trg mods name = do + trans <- readTranslation 0 src + let mod = concat $ intersperse "." (mods++[name]) + let wsize = case tp of + ISOMapping -> 1 + JISMapping -> 2 + let bsize = show (wsize*8) ++ (if wsize > 1 then "be" else "") + + --let (larr,off,arr) = staticArray32 trans + let (sarr,earr,els) = fillTranslations trans + {-let (lmp,idx,val) = staticMap wsize trans-} + let arrname = "decoding_array_"++name + let mpname = "encoding_map_"++name + let bcheck exp = if sarr/=0 || earr/=255 + then ("(if "++ + concat (intersperse "||" $ (if sarr/=0 then ["w<"++show sarr] else [])++(if earr/=255 then ["w>"++show earr] else []))++ + " then throwException $ IllegalCharacter $ fromIntegral w else "++exp++")" + ) else exp + let mp = buildCharMap (mapMaybe (\(i,c) -> do + rc <- c + return $ SingleMapping + rc + (reverse $ unfoldr (\(w,n) -> if n == 0 + then Nothing + else Just (fromIntegral w,(w `shiftR` 8,n-1))) (i,wsize)) + ) trans + ) + {-let mp = case wsize of + 1 -> buildStaticMap (mapMaybe (\(i,c) -> case c of + Nothing -> Nothing + Just rc -> Just (rc,fromIntegral i::Word8)) trans) + 2 -> buildStaticMap (mapMaybe (\(i,c) -> case c of + Nothing -> Nothing + Just rc -> Just (rc,fromIntegral i::Word16)) trans)-} + writeFile trg $ unlines $ + ["{-# LANGUAGE MagicHash,DeriveDataTypeable #-}" + ,"module "++mod++"("++name++"(..)) where" + ,"" + ,"import Data.Encoding.Base" + ,"import Data.Encoding.ByteSource" + ,"import Data.Encoding.ByteSink" + ,"import Data.Encoding.Exception" + ,"import Data.CharMap" + ,"import Data.Array.Static" + ,"import Data.Map.Static" + ,"import Control.Throws" + ,"import Prelude hiding (lookup)" + ,"import Data.Word" + ,"" + ,"import Data.Typeable" + ,"" + ,"data "++name++" = "++name + ," deriving (Show,Eq,Typeable)" + ,"" + ,arrname++" = "++buildStaticArray (sarr,earr) els + ,"" + ,mpname++" :: CharMap" + ,mpname++" = "++mp + ,"" + ,"instance Encoding "++name++" where" + ," decodeChar _ = do" + ," w <- fetchWord"++bsize + ," "++bcheck "return ()" + ," case "++arrname++"!w of" + ," Nothing -> throwException $ IllegalCharacter $ fromIntegral w" + ," Just c -> return c" + ," encodeChar _ c = mapEncode c "++mpname + ," encodeable _ c = mapMember c "++mpname + ] \ No newline at end of file diff --git a/Data/Encoding/Preprocessor/XMLMapping.hs b/Data/Encoding/Preprocessor/XMLMapping.hs new file mode 100644 index 0000000..168bfc1 --- /dev/null +++ b/Data/Encoding/Preprocessor/XMLMapping.hs @@ -0,0 +1,581 @@ +module Data.Encoding.Preprocessor.XMLMapping where + +import Data.Word +import Text.Read +import Text.Show +import Numeric +import Data.List (find) +import Data.Char +import Text.XML.HaXml.XmlContent +import Text.XML.HaXml.OneOfN + +testFile :: FilePath -> IO CharacterMapping +testFile fp = fReadXml fp + +{-Type decls-} + +data CharacterMapping = CharacterMapping CharacterMapping_Attrs + (Maybe History) + (OneOf2 Validity Stateful_siso) + Assignments + deriving (Eq,Show) +data CharacterMapping_Attrs = CharacterMapping_Attrs + { characterMappingId :: String + , characterMappingVersion :: String + , characterMappingDescription :: (Maybe String) + , characterMappingContact :: (Maybe String) + , characterMappingRegistrationAuthority :: (Maybe String) + , characterMappingRegistrationName :: (Maybe String) + , characterMappingCopyright :: (Maybe String) + , characterMappingBidiOrder :: (Defaultable CharacterMapping_bidiOrder) + , characterMappingCombiningOrder :: (Defaultable CharacterMapping_combiningOrder) + , characterMappingNormalization :: (Defaultable CharacterMapping_normalization) + } deriving (Eq,Show) +data CharacterMapping_bidiOrder = CharacterMapping_bidiOrder_logical + | CharacterMapping_bidiOrder_RTL | + CharacterMapping_bidiOrder_LTR + deriving (Eq,Show) +data CharacterMapping_combiningOrder = CharacterMapping_combiningOrder_before + | CharacterMapping_combiningOrder_after + deriving (Eq,Show) +data CharacterMapping_normalization = CharacterMapping_normalization_undetermined + | CharacterMapping_normalization_neither | + CharacterMapping_normalization_NFC | + CharacterMapping_normalization_NFD | + CharacterMapping_normalization_NFC_NFD + deriving (Eq,Show) +data Stateful_siso = Stateful_siso Validity Validity + deriving (Eq,Show) +newtype History = History (List1 Modified) deriving (Eq,Show) +data Modified = Modified Modified_Attrs String + deriving (Eq,Show) +data Modified_Attrs = Modified_Attrs + { modifiedVersion :: String + , modifiedDate :: String + } deriving (Eq,Show) +newtype Validity = Validity (List1 State) deriving (Eq,Show) +data State = State + { stateType :: String + , stateNext :: String + , stateS :: ByteSequence + , stateE :: (Maybe ByteSequence) + , stateMax :: (Maybe String) + } deriving (Eq,Show) +data Assignments = Assignments Assignments_Attrs [A] [Fub] [Fbu] + [Sub1] [Range] + deriving (Eq,Show) +data Assignments_Attrs = Assignments_Attrs + { assignmentsSub :: (Defaultable String) + , assignmentsSub1 :: (Maybe String) + } deriving (Eq,Show) +data A = A + { aB :: ByteSequence + , aU :: CodePoints + , aC :: (Maybe String) + , aV :: (Maybe String) + } deriving (Eq,Show) +data Fub = Fub + { fubB :: ByteSequence + , fubU :: CodePoints + , fubC :: (Maybe String) + , fubRu :: (Maybe String) + , fubRc :: (Maybe String) + , fubV :: (Maybe String) + } deriving (Eq,Show) +data Fbu = Fbu + { fbuB :: ByteSequence + , fbuU :: CodePoints + , fbuV :: (Maybe String) + } deriving (Eq,Show) +data Sub1 = Sub1 + { sub1U :: CodePoints + , sub1C :: (Maybe String) + , sub1V :: (Maybe String) + } deriving (Eq,Show) +data Range = Range + { rangeBFirst :: ByteSequence + , rangeBLast :: ByteSequence + , rangeUFirst :: CodePoints + , rangeULast :: CodePoints + , rangeBMin :: ByteSequence + , rangeBMax :: ByteSequence + , rangeV :: (Maybe String) + } deriving (Eq,Show) +data Iso2022 = Iso2022 (Maybe Default2022) + (List1 (OneOf5 Escape Si So Ss2 Ss3)) + deriving (Eq,Show) +data Default2022 = Default2022 + { default2022Name :: String + } deriving (Eq,Show) +data Escape = Escape + { escapeSequence :: String + , escapeName :: String + } deriving (Eq,Show) +newtype Si = Si (List1 Designator) deriving (Eq,Show) +newtype So = So (List1 Designator) deriving (Eq,Show) +newtype Ss2 = Ss2 (List1 Designator) deriving (Eq,Show) +newtype Ss3 = Ss3 (List1 Designator) deriving (Eq,Show) +data Designator = Designator + { designatorSequence :: String + , designatorName :: String + } deriving (Eq,Show) + +newtype ByteSequence = BS [Word8] deriving Eq + +newtype CodePoints = CP [Char] deriving Eq + +{-Instance decls-} + +instance HTypeable CharacterMapping where + toHType x = Defined "characterMapping" [] [] +instance XmlContent CharacterMapping where + toContents (CharacterMapping as a b c) = + [CElem (Elem "characterMapping" (toAttrs as) (maybe [] toContents a + ++ toContents b + ++ toContents c)) ()] + parseContents = do + { e@(Elem _ as _) <- element ["characterMapping"] + ; interior e $ return (CharacterMapping (fromAttrs as)) + `apply` optional parseContents `apply` parseContents `apply` parseContents + } `adjustErr` ("in , "++) +instance XmlAttributes CharacterMapping_Attrs where + fromAttrs as = + CharacterMapping_Attrs + { characterMappingId = definiteA fromAttrToStr "characterMapping" "id" as + , characterMappingVersion = definiteA fromAttrToStr "characterMapping" "version" as + , characterMappingDescription = possibleA fromAttrToStr "description" as + , characterMappingContact = possibleA fromAttrToStr "contact" as + , characterMappingRegistrationAuthority = possibleA fromAttrToStr "registrationAuthority" as + , characterMappingRegistrationName = possibleA fromAttrToStr "registrationName" as + , characterMappingCopyright = possibleA fromAttrToStr "copyright" as + , characterMappingBidiOrder = defaultA fromAttrToTyp CharacterMapping_bidiOrder_logical "bidiOrder" as + , characterMappingCombiningOrder = defaultA fromAttrToTyp CharacterMapping_combiningOrder_after "combiningOrder" as + , characterMappingNormalization = defaultA fromAttrToTyp CharacterMapping_normalization_undetermined "normalization" as + } + toAttrs v = catMaybes + [ toAttrFrStr "id" (characterMappingId v) + , toAttrFrStr "version" (characterMappingVersion v) + , maybeToAttr toAttrFrStr "description" (characterMappingDescription v) + , maybeToAttr toAttrFrStr "contact" (characterMappingContact v) + , maybeToAttr toAttrFrStr "registrationAuthority" (characterMappingRegistrationAuthority v) + , maybeToAttr toAttrFrStr "registrationName" (characterMappingRegistrationName v) + , maybeToAttr toAttrFrStr "copyright" (characterMappingCopyright v) + , defaultToAttr toAttrFrTyp "bidiOrder" (characterMappingBidiOrder v) + , defaultToAttr toAttrFrTyp "combiningOrder" (characterMappingCombiningOrder v) + , defaultToAttr toAttrFrTyp "normalization" (characterMappingNormalization v) + ] + +instance XmlAttrType CharacterMapping_bidiOrder where + fromAttrToTyp n (n',v) + | n==n' = translate (attr2str v) + | otherwise = Nothing + where translate "logical" = Just CharacterMapping_bidiOrder_logical + translate "RTL" = Just CharacterMapping_bidiOrder_RTL + translate "LTR" = Just CharacterMapping_bidiOrder_LTR + translate _ = Nothing + toAttrFrTyp n CharacterMapping_bidiOrder_logical = Just (n, str2attr "logical") + toAttrFrTyp n CharacterMapping_bidiOrder_RTL = Just (n, str2attr "RTL") + toAttrFrTyp n CharacterMapping_bidiOrder_LTR = Just (n, str2attr "LTR") + +instance XmlAttrType CharacterMapping_combiningOrder where + fromAttrToTyp n (n',v) + | n==n' = translate (attr2str v) + | otherwise = Nothing + where translate "before" = Just CharacterMapping_combiningOrder_before + translate "after" = Just CharacterMapping_combiningOrder_after + translate _ = Nothing + toAttrFrTyp n CharacterMapping_combiningOrder_before = Just (n, str2attr "before") + toAttrFrTyp n CharacterMapping_combiningOrder_after = Just (n, str2attr "after") + +instance XmlAttrType CharacterMapping_normalization where + fromAttrToTyp n (n',v) + | n==n' = translate (attr2str v) + | otherwise = Nothing + where translate "undetermined" = Just CharacterMapping_normalization_undetermined + translate "neither" = Just CharacterMapping_normalization_neither + translate "NFC" = Just CharacterMapping_normalization_NFC + translate "NFD" = Just CharacterMapping_normalization_NFD + translate "NFC_NFD" = Just CharacterMapping_normalization_NFC_NFD + translate _ = Nothing + toAttrFrTyp n CharacterMapping_normalization_undetermined = Just (n, str2attr "undetermined") + toAttrFrTyp n CharacterMapping_normalization_neither = Just (n, str2attr "neither") + toAttrFrTyp n CharacterMapping_normalization_NFC = Just (n, str2attr "NFC") + toAttrFrTyp n CharacterMapping_normalization_NFD = Just (n, str2attr "NFD") + toAttrFrTyp n CharacterMapping_normalization_NFC_NFD = Just (n, str2attr "NFC_NFD") + +instance XmlAttrType ByteSequence where + fromAttrToTyp n (n',v) + | n==n' = parseByteSequence (attr2str v) + | otherwise = Nothing + toAttrFrTyp n bs = Just (n, str2attr $ show bs) + +parseByteSequence :: String -> Maybe ByteSequence +parseByteSequence str = do + seq <- mapM (\w -> do + (res,_) <- find (null.snd) (readHex w) + return res + ) (words str) + return $ BS seq + +instance Show ByteSequence where + show (BS seq) = foldl (\f w -> f . (showChar ' ') . (showHex w)) id seq "" + +instance XmlAttrType CodePoints where + fromAttrToTyp n (n',v) + | n==n' = parseCodePoints (attr2str v) + | otherwise = Nothing + toAttrFrTyp n bs = Just (n, str2attr $ show bs) + +parseCodePoints :: String -> Maybe CodePoints +parseCodePoints str = do + seq <- mapM (\w -> do + (res,_) <- find (null.snd) (readHex w) + return (chr res) + ) (words str) + return $ CP seq + +instance Show CodePoints where + show (CP seq) = foldl (\f w -> f . (showChar ' ') . (showHex (ord w))) id seq "" + +instance HTypeable Stateful_siso where + toHType x = Defined "stateful_siso" [] [] +instance XmlContent Stateful_siso where + toContents (Stateful_siso a b) = + [CElem (Elem "stateful_siso" [] (toContents a ++ toContents b)) ()] + parseContents = do + { e@(Elem _ [] _) <- element ["stateful_siso"] + ; interior e $ return (Stateful_siso) `apply` parseContents + `apply` parseContents + } `adjustErr` ("in , "++) + +instance HTypeable History where + toHType x = Defined "history" [] [] +instance XmlContent History where + toContents (History a) = + [CElem (Elem "history" [] (toContents a)) ()] + parseContents = do + { e@(Elem _ [] _) <- element ["history"] + ; interior e $ return (History) `apply` parseContents + } `adjustErr` ("in , "++) + +instance HTypeable Modified where + toHType x = Defined "modified" [] [] +instance XmlContent Modified where + toContents (Modified as a) = + [CElem (Elem "modified" (toAttrs as) (toText a)) ()] + parseContents = do + { e@(Elem _ as _) <- element ["modified"] + ; interior e $ return (Modified (fromAttrs as)) + `apply` (text `onFail` return "") + } `adjustErr` ("in , "++) +instance XmlAttributes Modified_Attrs where + fromAttrs as = + Modified_Attrs + { modifiedVersion = definiteA fromAttrToStr "modified" "version" as + , modifiedDate = definiteA fromAttrToStr "modified" "date" as + } + toAttrs v = catMaybes + [ toAttrFrStr "version" (modifiedVersion v) + , toAttrFrStr "date" (modifiedDate v) + ] + +instance HTypeable Validity where + toHType x = Defined "validity" [] [] +instance XmlContent Validity where + toContents (Validity a) = + [CElem (Elem "validity" [] (toContents a)) ()] + parseContents = do + { e@(Elem _ [] _) <- element ["validity"] + ; interior e $ return (Validity) `apply` parseContents + } `adjustErr` ("in , "++) + +instance HTypeable State where + toHType x = Defined "state" [] [] +instance XmlContent State where + toContents as = + [CElem (Elem "state" (toAttrs as) []) ()] + parseContents = do + { (Elem _ as []) <- element ["state"] + ; return (fromAttrs as) + } `adjustErr` ("in , "++) +instance XmlAttributes State where + fromAttrs as = + State + { stateType = definiteA fromAttrToStr "state" "type" as + , stateNext = definiteA fromAttrToStr "state" "next" as + , stateS = definiteA fromAttrToTyp "state" "s" as + , stateE = possibleA fromAttrToTyp "e" as + , stateMax = possibleA fromAttrToStr "max" as + } + toAttrs v = catMaybes + [ toAttrFrStr "type" (stateType v) + , toAttrFrStr "next" (stateNext v) + , toAttrFrTyp "s" (stateS v) + , maybeToAttr toAttrFrTyp "e" (stateE v) + , maybeToAttr toAttrFrStr "max" (stateMax v) + ] + +instance HTypeable Assignments where + toHType x = Defined "assignments" [] [] +instance XmlContent Assignments where + toContents (Assignments as a b c d e) = + [CElem (Elem "assignments" (toAttrs as) (concatMap toContents a ++ + concatMap toContents b ++ concatMap toContents c ++ + concatMap toContents d ++ + concatMap toContents e)) ()] + parseContents = do + { e@(Elem _ as _) <- element ["assignments"] + ; interior e $ return (Assignments (fromAttrs as)) + `apply` many parseContents `apply` many parseContents + `apply` many parseContents `apply` many parseContents + `apply` many parseContents + } `adjustErr` ("in , "++) +instance XmlAttributes Assignments_Attrs where + fromAttrs as = + Assignments_Attrs + { assignmentsSub = defaultA fromAttrToStr "1A" "sub" as + , assignmentsSub1 = possibleA fromAttrToStr "sub1" as + } + toAttrs v = catMaybes + [ defaultToAttr toAttrFrStr "sub" (assignmentsSub v) + , maybeToAttr toAttrFrStr "sub1" (assignmentsSub1 v) + ] + +instance HTypeable A where + toHType x = Defined "a" [] [] +instance XmlContent A where + toContents as = + [CElem (Elem "a" (toAttrs as) []) ()] + parseContents = do + { (Elem _ as []) <- element ["a"] + ; return (fromAttrs as) + } `adjustErr` ("in , "++) +instance XmlAttributes A where + fromAttrs as = + A { aB = definiteA fromAttrToTyp "a" "b" as + , aU = definiteA fromAttrToTyp "a" "u" as + , aC = possibleA fromAttrToStr "c" as + , aV = possibleA fromAttrToStr "v" as + } + toAttrs v = catMaybes + [ toAttrFrTyp "b" (aB v) + , toAttrFrTyp "u" (aU v) + , maybeToAttr toAttrFrStr "c" (aC v) + , maybeToAttr toAttrFrStr "v" (aV v) + ] + +instance HTypeable Fub where + toHType x = Defined "fub" [] [] +instance XmlContent Fub where + toContents as = + [CElem (Elem "fub" (toAttrs as) []) ()] + parseContents = do + { (Elem _ as []) <- element ["fub"] + ; return (fromAttrs as) + } `adjustErr` ("in , "++) +instance XmlAttributes Fub where + fromAttrs as = + Fub + { fubB = definiteA fromAttrToTyp "fub" "b" as + , fubU = definiteA fromAttrToTyp "fub" "u" as + , fubC = possibleA fromAttrToStr "c" as + , fubRu = possibleA fromAttrToStr "ru" as + , fubRc = possibleA fromAttrToStr "rc" as + , fubV = possibleA fromAttrToStr "v" as + } + toAttrs v = catMaybes + [ toAttrFrTyp "b" (fubB v) + , toAttrFrTyp "u" (fubU v) + , maybeToAttr toAttrFrStr "c" (fubC v) + , maybeToAttr toAttrFrStr "ru" (fubRu v) + , maybeToAttr toAttrFrStr "rc" (fubRc v) + , maybeToAttr toAttrFrStr "v" (fubV v) + ] + +instance HTypeable Fbu where + toHType x = Defined "fbu" [] [] +instance XmlContent Fbu where + toContents as = + [CElem (Elem "fbu" (toAttrs as) []) ()] + parseContents = do + { (Elem _ as []) <- element ["fbu"] + ; return (fromAttrs as) + } `adjustErr` ("in , "++) +instance XmlAttributes Fbu where + fromAttrs as = + Fbu + { fbuB = definiteA fromAttrToTyp "fbu" "b" as + , fbuU = definiteA fromAttrToTyp "fbu" "u" as + , fbuV = possibleA fromAttrToStr "v" as + } + toAttrs v = catMaybes + [ toAttrFrTyp "b" (fbuB v) + , toAttrFrTyp "u" (fbuU v) + , maybeToAttr toAttrFrStr "v" (fbuV v) + ] + +instance HTypeable Sub1 where + toHType x = Defined "sub1" [] [] +instance XmlContent Sub1 where + toContents as = + [CElem (Elem "sub1" (toAttrs as) []) ()] + parseContents = do + { (Elem _ as []) <- element ["sub1"] + ; return (fromAttrs as) + } `adjustErr` ("in , "++) +instance XmlAttributes Sub1 where + fromAttrs as = + Sub1 + { sub1U = definiteA fromAttrToTyp "sub1" "u" as + , sub1C = possibleA fromAttrToStr "c" as + , sub1V = possibleA fromAttrToStr "v" as + } + toAttrs v = catMaybes + [ toAttrFrTyp "u" (sub1U v) + , maybeToAttr toAttrFrStr "c" (sub1C v) + , maybeToAttr toAttrFrStr "v" (sub1V v) + ] + +instance HTypeable Range where + toHType x = Defined "range" [] [] +instance XmlContent Range where + toContents as = + [CElem (Elem "range" (toAttrs as) []) ()] + parseContents = do + { (Elem _ as []) <- element ["range"] + ; return (fromAttrs as) + } `adjustErr` ("in , "++) +instance XmlAttributes Range where + fromAttrs as = + Range + { rangeBFirst = definiteA fromAttrToTyp "range" "bFirst" as + , rangeBLast = definiteA fromAttrToTyp "range" "bLast" as + , rangeUFirst = definiteA fromAttrToTyp "range" "uFirst" as + , rangeULast = definiteA fromAttrToTyp "range" "uLast" as + , rangeBMin = definiteA fromAttrToTyp "range" "bMin" as + , rangeBMax = definiteA fromAttrToTyp "range" "bMax" as + , rangeV = possibleA fromAttrToStr "v" as + } + toAttrs v = catMaybes + [ toAttrFrTyp "bFirst" (rangeBFirst v) + , toAttrFrTyp "bLast" (rangeBLast v) + , toAttrFrTyp "uFirst" (rangeUFirst v) + , toAttrFrTyp "uLast" (rangeULast v) + , toAttrFrTyp "bMin" (rangeBMin v) + , toAttrFrTyp "bMax" (rangeBMax v) + , maybeToAttr toAttrFrStr "v" (rangeV v) + ] + +instance HTypeable Iso2022 where + toHType x = Defined "iso2022" [] [] +instance XmlContent Iso2022 where + toContents (Iso2022 a b) = + [CElem (Elem "iso2022" [] (maybe [] toContents a ++ + toContents b)) ()] + parseContents = do + { e@(Elem _ [] _) <- element ["iso2022"] + ; interior e $ return (Iso2022) `apply` optional parseContents + `apply` parseContents + } `adjustErr` ("in , "++) + +instance HTypeable Default2022 where + toHType x = Defined "default2022" [] [] +instance XmlContent Default2022 where + toContents as = + [CElem (Elem "default2022" (toAttrs as) []) ()] + parseContents = do + { (Elem _ as []) <- element ["default2022"] + ; return (fromAttrs as) + } `adjustErr` ("in , "++) +instance XmlAttributes Default2022 where + fromAttrs as = + Default2022 + { default2022Name = definiteA fromAttrToStr "default2022" "name" as + } + toAttrs v = catMaybes + [ toAttrFrStr "name" (default2022Name v) + ] + +instance HTypeable Escape where + toHType x = Defined "escape" [] [] +instance XmlContent Escape where + toContents as = + [CElem (Elem "escape" (toAttrs as) []) ()] + parseContents = do + { (Elem _ as []) <- element ["escape"] + ; return (fromAttrs as) + } `adjustErr` ("in , "++) +instance XmlAttributes Escape where + fromAttrs as = + Escape + { escapeSequence = definiteA fromAttrToStr "escape" "sequence" as + , escapeName = definiteA fromAttrToStr "escape" "name" as + } + toAttrs v = catMaybes + [ toAttrFrStr "sequence" (escapeSequence v) + , toAttrFrStr "name" (escapeName v) + ] + +instance HTypeable Si where + toHType x = Defined "si" [] [] +instance XmlContent Si where + toContents (Si a) = + [CElem (Elem "si" [] (toContents a)) ()] + parseContents = do + { e@(Elem _ [] _) <- element ["si"] + ; interior e $ return (Si) `apply` parseContents + } `adjustErr` ("in , "++) + +instance HTypeable So where + toHType x = Defined "so" [] [] +instance XmlContent So where + toContents (So a) = + [CElem (Elem "so" [] (toContents a)) ()] + parseContents = do + { e@(Elem _ [] _) <- element ["so"] + ; interior e $ return (So) `apply` parseContents + } `adjustErr` ("in , "++) + +instance HTypeable Ss2 where + toHType x = Defined "ss2" [] [] +instance XmlContent Ss2 where + toContents (Ss2 a) = + [CElem (Elem "ss2" [] (toContents a)) ()] + parseContents = do + { e@(Elem _ [] _) <- element ["ss2"] + ; interior e $ return (Ss2) `apply` parseContents + } `adjustErr` ("in , "++) + +instance HTypeable Ss3 where + toHType x = Defined "ss3" [] [] +instance XmlContent Ss3 where + toContents (Ss3 a) = + [CElem (Elem "ss3" [] (toContents a)) ()] + parseContents = do + { e@(Elem _ [] _) <- element ["ss3"] + ; interior e $ return (Ss3) `apply` parseContents + } `adjustErr` ("in , "++) + +instance HTypeable Designator where + toHType x = Defined "designator" [] [] +instance XmlContent Designator where + toContents as = + [CElem (Elem "designator" (toAttrs as) []) ()] + parseContents = do + { (Elem _ as []) <- element ["designator"] + ; return (fromAttrs as) + } `adjustErr` ("in , "++) +instance XmlAttributes Designator where + fromAttrs as = + Designator + { designatorSequence = definiteA fromAttrToStr "designator" "sequence" as + , designatorName = definiteA fromAttrToStr "designator" "name" as + } + toAttrs v = catMaybes + [ toAttrFrStr "sequence" (designatorSequence v) + , toAttrFrStr "name" (designatorName v) + ] + + + +{-Done-} diff --git a/Data/Encoding/Preprocessor/XMLMappingBuilder.hs b/Data/Encoding/Preprocessor/XMLMappingBuilder.hs new file mode 100644 index 0000000..31d27df --- /dev/null +++ b/Data/Encoding/Preprocessor/XMLMappingBuilder.hs @@ -0,0 +1,315 @@ +{-# LANGUAGE ParallelListComp #-} +module Data.Encoding.Preprocessor.XMLMappingBuilder where + +import Data.Word +import Data.List +import Data.Ord +import Data.Char +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Array.Static.Builder +import Data.CharMap.Builder +import Data.Encoding.Preprocessor.XMLMapping +import Distribution.Simple.PreProcess +import System.FilePath +import Text.XML.HaXml.OneOfN +import Text.XML.HaXml.XmlContent +import Control.Exception (assert) + +xmlPreprocessor :: PreProcessor +xmlPreprocessor = PreProcessor + { platformIndependent = True + , runPreProcessor = \src trg verb -> do + createModuleFromFile src trg + } + +description :: CharacterMapping -> Maybe String +description (CharacterMapping attrs _ _ _) = characterMappingDescription attrs + +createModuleFromFile (sbase,sfile) (tbase,tfile) = do + xml <- testFile (sbase sfile) + let (dir,fn) = splitFileName sfile + let (bname,ext) = splitExtensions fn + let dirs = splitDirectories dir + let body = buildDecisionTree minBound maxBound "ch" (encodingElements xml) + let body2 = createDecoding (states xml) (decodingElements xml) + let mpname = "encoding_map_"++bname + let mp = buildCharMap $ [SingleMapping c w | (c,w) <- assignments xml] + ++[ RangeMapping + st end + (foldl (\v (w,mi,ma) -> v*((fromIntegral $ ma-mi)+1) + (fromIntegral (w-mi))) 0 (zip3 bfirst bmin bmax)) + [(min,max-min+1) | min <- bmin | max <- bmax] + | (st,end,bfirst,blast,bmin,bmax) <- ranges xml ] + writeFile (tbasetfile) $ unlines $ + ["{-# LANGUAGE MagicHash,DeriveDataTypeable #-}"]++ + (case description xml of + Nothing -> [] + Just str -> ["{- | "++str++" -}"]) ++ + ["module "++concat (intersperse "." (dirs++[bname])) + ," ("++bname++"("++bname++"))" + ," where" + ,"" + ,"import Control.Throws" + ,"import Data.Encoding.Base" + ,"import Data.Encoding.ByteSink" + ,"import Data.Encoding.ByteSource" + ,"import Data.Encoding.Exception" + ,"import Data.Array.Static" + ,"import Data.Map.Static" + ,"import Data.CharMap" + ,"import Data.Char" + ,"import Data.Word" + ,"import Data.Typeable" + ,"" + ,"data "++bname++" = "++bname + ," deriving (Eq,Show,Typeable)" + ,"" + ,mpname++" :: CharMap" + ,mpname++" = "++mp + ,"" + ,"instance Encoding "++bname++" where" + ," encodeChar _ ch = mapEncode ch "++mpname + ," decodeChar _ = "++body2 + ," encodeable _ ch = mapMember ch "++mpname + ] + +decodingValueRange :: [(Word8,Word8)] -> DecodingElement -> (Int,Int) +decodingValueRange path (DecodingElement c ws) + = let v = foldl (\n (w,(lo,up)) -> n*((fromIntegral $ up-lo)+1) + (fromIntegral $ w - lo)) 0 (zip ws path) + in (v,v) +decodingValueRange path (DecodingRange first last bfirst blast bmin bmax) + = assert (zip bmin bmax == path) $ + (decodingValue path bfirst + ,decodingValue path blast) + +decodingValue :: [(Word8,Word8)] -> [Word8] -> Int +decodingValue path ws + = foldl (\n (w,(lo,up)) -> n*((fromIntegral $ up-lo) + 1) + (fromIntegral $ w - lo)) + 0 (zip ws path) + +type StateMachine = Map String [(Word8,Word8,String)] + +createDecoding :: StateMachine -> [DecodingElement] -> String +createDecoding sm els = create' els [] 0 "FIRST" + where + create' els path n st = let trans = sortBy (\(s1,e1,st1) (s2,e2,st2) -> compare s1 s2) $ sm Map.! st + in "(fetchWord8 >>= \\w"++show n++" -> " ++ tree' n path els trans 0 255++")" + + tree' :: Int -> [(Word8,Word8)] -> [DecodingElement] -> [(Word8,Word8,String)] -> Word8 -> Word8 -> String + tree' n path els [] _ _ = illWord $ "w"++show n + tree' n path els [(s,e,nst)] bl br + = let e1 = if s > bl + then "(if w"++show n++" < "++show s++" then "++illWord ("w"++show n)++" else "++e2++")" + else e2 + e2 = if e < br + then "(if w"++show n++" > "++show e++" then "++illWord ("w"++show n)++" else "++e3++")" + else e3 + e3 = if nst == "VALID" + then array' rpath sels + else "{- for "++nst++"-}" ++ create' nels npath (n+1) nst + npath = (s,e):path + rpath = reverse npath + sels = sortBy (comparing (decodingValueRange rpath)) nels + nels = filter (\el -> let (ll,lr) = (decodingLimits el)!!n in ll>=s && lr <= e) els + in e1 + tree' n path els trans bl br + = let (left,right@((b,_,_):_)) = splitAt (length trans `div` 2) trans + (eleft,eright) = partition (\el -> fst ((decodingLimits el)!!n) < b) els + in "(if w"++show n++" < "++show b++" then "++tree' n path eleft left bl (b-1) + ++" else "++tree' n path eright right b br++")" + + array' path els = let grps = groupBy (\e1 e2 -> case e1 of + DecodingRange _ _ _ _ _ _ -> False + _ -> case e2 of + DecodingRange _ _ _ _ _ _ -> False + _ -> True + ) els + ranges = map (\(l,u) -> (fromIntegral $ u-l)+1) path + val = foldl (\expr (r,n,m) -> "("++expr++"*"++show r++"+(fromIntegral w"++show n++"-"++show m++"))") + "0" + (zip3 ranges [0..] (map fst path)) + offset = (product ranges)-1 + in "(let val = " ++ val ++ " in "++array'' path grps 0 offset++")" + + array'' path [] _ _ = "throwException (IllegalRepresentation ["++concat (intersperse "," (zipWith (\n _ -> "w"++show n) [0..] path))++"])" + array'' path [grp] lo up + = case grp of + [DecodingRange first end bfirst bend bmin bmax] -> + let ranges = map (\(l,u) -> (fromIntegral $ u-l)+1) path + off = foldl (\v (r,c,m) -> v*r+(fromIntegral $ c-m)) 0 (zip3 ranges bfirst bmin) + equalranges = and $ zipWith (==) path (zip bmin bmax) + in if equalranges + then "(return (chr (val + ("++show (ord first - off)++"))))" + else error "Can't have a range that has a different range..." + _ -> let chars = fillRange lo $ map (\el@(DecodingElement c _) -> (c,fst $ decodingValueRange path el)) grp + in "(return (("++buildStaticArray (lo,up) chars++")!val))" + array'' path grps lo up = let (left,right@(brk:_)) = splitAt (length grps `div` 2) grps + (off,_) = decodingValueRange path (head brk) + in "(if val < "++show off++" then "++array'' path left lo (off-1) + ++" else "++array'' path right off up++")" + +fillRange :: Int -> [(Char,Int)] -> [Char] +fillRange s [] = [] +fillRange s all@((c,i):cs) = case compare i s of + GT -> '\0':fillRange (s+1) all + LT -> error $ "Char out of range "++show (take 10 all) + EQ -> c:fillRange (s+1) cs + +states :: CharacterMapping -> StateMachine +states (CharacterMapping attrs hist val ass) + = case val of + OneOf2 (Validity (NonEmpty lst)) -> Map.fromListWith (++) $ + map (\st -> let BS [start] = stateS st + end = case stateE st of + Nothing -> start + Just (BS [rend]) -> rend + in (stateType st,[(start,end,stateNext st)])) lst + _ -> error "Mapping doesn't contain validity section" + +decodingElements :: CharacterMapping -> [DecodingElement] +decodingElements mp = map (\(c,ws) -> DecodingElement c ws) (assignments mp) + ++ map (\(fi,la,bfi,bla,bmi,bma) -> DecodingRange fi la bfi bla bmi bma) (ranges mp) + +illWord :: String -> String +illWord n = "throwException (IllegalCharacter "++n++")" + +decodingLimits :: DecodingElement -> [(Word8,Word8)] +decodingLimits (DecodingElement _ ws) = map (\w -> (w,w)) ws +decodingLimits (DecodingRange _ _ bfirst blast bmin bmax) = lim' False (zip4 bfirst blast bmin bmax) + where + lim' dec [] = [] + lim' dec ((fi,la,mi,ma):xs) = if dec + then (mi,ma):(lim' dec xs) + else (fi,la):(lim' (fi/=la) xs) + +decodingLength :: DecodingElement -> Int +decodingLength (DecodingRange _ _ first _ _ _) = length first +decodingLength (DecodingElement _ ws) = length ws + +decodingElementCount :: DecodingElement -> Int +decodingElementCount (DecodingRange s e _ _ _ _) = ord e - ord s +decodingElementCount (DecodingElement _ _) = 1 + +data DecodingElement + = DecodingRange Char Char [Word8] [Word8] [Word8] [Word8] + | DecodingElement Char [Word8] + deriving Show + +norep :: String -> String +norep var = "(throwException $ HasNoRepresentation "++var++")" + +buildDecisionTree :: Char -> Char -> String -> [EncodingElement] -> String +buildDecisionTree l r var [] = norep var +buildDecisionTree l r var [el] + = let e1 = if l < startChar el + then "(if "++var++" < "++show (startChar el)++" then "++norep var++" else "++e2++")" + else e2 + e2 = if r > endChar el + then "(if "++var++" > "++show (endChar el)++" then "++norep var++" else "++e3++")" + else e3 + e3 = buildEncoding el var + in e1 +buildDecisionTree ll lr var els + = let (l,r@(sep:_)) = splitAt (length els `div` 2) els + in "(if "++var++" < "++show (startChar sep) + ++" then ("++(buildDecisionTree ll (pred $ startChar sep) var l)++")" + ++" else ("++(buildDecisionTree (endChar sep) lr var r)++")" + ++")" + +buildEncoding :: EncodingElement -> String -> String +buildEncoding (EncodingRange start end bf bl bmin bmax) var + = let ranges :: [Int] + ranges = map fromIntegral $ zipWith (-) bmax bmin + in "(let num = (ord "++var++") - ("++show (ord start - (foldl (\n (r,vf,vm) -> n*(r+1) + (fromIntegral (vf-vm))) 0 (zip3 ranges bf bmin)))++")" + ++concat ([ " ; (p"++show n++",r"++show n++") = " + ++(if n==1 then "num" else "p"++show (n-1)) + ++" `divMod` "++show (r+1) + | r <- reverse ranges | n <- [1..] ]) + ++" in " + ++concat (intersperse " >> " (reverse ["pushWord8 (fromIntegral (r"++show n++" + "++show w++"))" | n <- [1..] | w <- reverse bmin])) + ++")" +buildEncoding (EncodingGroup start end encs) var + = let findParams st [] = st + findParams st (x:xs) = findParams (case compare (length x) (fst st) of + LT -> (fst st,False) + GT -> (length x,False) + EQ -> st) xs + (mx,same) = findParams (length $ head encs,True) (tail encs) + in if same + then ("(let off = "++show mx++"*(ord "++var++" - "++show (ord start)++") ; arr = " + ++buildStaticArray (0,(length encs)*mx-1) (concat encs) + ++" in " + ++concat (intersperse " >> " ["pushWord8 (arr!(off+"++show (n-1)++"))" | n <- [1..mx]]) + ++")") + else ("(let off = "++show (mx+1)++"*((ord "++var++") - "++show (ord start)++") ; arr = " + ++buildStaticArray (0,(length encs)*(mx+1)-1) + (concat [(fromIntegral $ length e) + :(e++replicate (mx-length e) 0) | e <- encs]) + ++ "::StaticArray Int Word8" + ++" ; len = fromIntegral (arr!off)::Int ; bytes = map (\\n -> arr!(off+n)) [1..len]" + ++" in mapM_ pushWord8 bytes)") + +data EncodingElement + = EncodingRange Char Char [Word8] [Word8] [Word8] [Word8] + | EncodingGroup Char Char [[Word8]] + deriving Show + +mergeBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a] +mergeBy f [] ys = ys +mergeBy f xs [] = xs +mergeBy f (x:xs) (y:ys) + = case f x y of + LT -> x:mergeBy f xs (y:ys) + _ -> y:mergeBy f (x:xs) ys + +startChar :: EncodingElement -> Char +startChar (EncodingRange c _ _ _ _ _) = c +startChar (EncodingGroup c _ _) = c + +endChar :: EncodingElement -> Char +endChar (EncodingRange _ c _ _ _ _) = c +endChar (EncodingGroup _ c _) = c + + +encodingElements :: CharacterMapping -> [EncodingElement] +encodingElements mp = mergeBy (comparing startChar) + (buildGroups $ sortAssignments $ assignments mp) + (encodingRanges $ ranges mp) + +assignments :: CharacterMapping -> [(Char,[Word8])] +assignments (CharacterMapping _ _ _ (Assignments _ ass _ _ _ ranges)) + = map (\a -> let CP [cp] = aU a + BS bs = aB a + in (cp,bs) + ) ass + +encodingRanges :: [(Char,Char,[Word8],[Word8],[Word8],[Word8])] -> [EncodingElement] +encodingRanges lst = sortBy (comparing (\(EncodingRange c _ _ _ _ _) -> c)) $ + map (\(ufirst,ulast,bfirst,blast,bmin,bmax) -> EncodingRange ufirst ulast bfirst blast bmin bmax) lst + +ranges :: CharacterMapping -> [(Char,Char,[Word8],[Word8],[Word8],[Word8])] +ranges (CharacterMapping _ _ _ (Assignments _ ass _ _ _ ranges)) + = map (\r -> let BS bfirst = rangeBFirst r + BS blast = rangeBLast r + CP [ufirst] = rangeUFirst r + CP [ulast] = rangeULast r + BS bmin = rangeBMin r + BS bmax = rangeBMax r + in (ufirst,ulast,bfirst,blast,bmin,bmax) + ) ranges + + +sortAssignments :: [(Char,[Word8])] -> [(Char,[Word8])] +sortAssignments = sortBy (comparing fst) + +buildGroups :: [(Char,[Word8])] -> [EncodingElement] +buildGroups [] = [] +buildGroups ((c,bs):rest) = (EncodingGroup c end (bs:wrds)):buildGroups oth + where + (end,wrds,oth) = group c rest + + group n [] = (n,[],[]) + group n all@((c,bs):rest) + | succ n == c = let (e,res,oth) = group c rest + in (e,bs:res,oth) + | otherwise = (n,[],all) \ No newline at end of file diff --git a/Setup.hs b/Setup.hs index 2a1c720..75dba6f 100644 --- a/Setup.hs +++ b/Setup.hs @@ -1,5 +1,12 @@ module Main where -import Distribution.Simple (defaultMain) +import Distribution.Simple +import Data.Encoding.Preprocessor.Mapping +import Data.Encoding.Preprocessor.XMLMappingBuilder -main = defaultMain +main = defaultMainWithHooks (simpleUserHooks + {hookedPreProcessors = ("mapping",\_ _ -> mappingPreprocessor) + :("mapping2",\_ _ -> mappingPreprocessor) + :("xml",\_ _ -> xmlPreprocessor) + :(hookedPreProcessors simpleUserHooks) + })