encoding/Data/Encoding/Preprocessor/XMLMappingBuilder.hs
Henning Guenther 39af34b0a3 Added Preprocessor for XML mappings and normal mappings
Ignore-this: dc0902f526ceb99db528e14c9e3ad563

darcs-hash:20090813024109-a4fee-447c0ff194c227ed919d6eef0f7824e63276183e
2009-08-12 19:41:09 -07:00

315 lines
15 KiB
Haskell

{-# LANGUAGE ParallelListComp #-}
module Data.Encoding.Preprocessor.XMLMappingBuilder where
import Data.Word
import Data.List
import Data.Ord
import Data.Char
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Array.Static.Builder
import Data.CharMap.Builder
import Data.Encoding.Preprocessor.XMLMapping
import Distribution.Simple.PreProcess
import System.FilePath
import Text.XML.HaXml.OneOfN
import Text.XML.HaXml.XmlContent
import Control.Exception (assert)
xmlPreprocessor :: PreProcessor
xmlPreprocessor = PreProcessor
{ platformIndependent = True
, runPreProcessor = \src trg verb -> do
createModuleFromFile src trg
}
description :: CharacterMapping -> Maybe String
description (CharacterMapping attrs _ _ _) = characterMappingDescription attrs
createModuleFromFile (sbase,sfile) (tbase,tfile) = do
xml <- testFile (sbase </> sfile)
let (dir,fn) = splitFileName sfile
let (bname,ext) = splitExtensions fn
let dirs = splitDirectories dir
let body = buildDecisionTree minBound maxBound "ch" (encodingElements xml)
let body2 = createDecoding (states xml) (decodingElements xml)
let mpname = "encoding_map_"++bname
let mp = buildCharMap $ [SingleMapping c w | (c,w) <- assignments xml]
++[ RangeMapping
st end
(foldl (\v (w,mi,ma) -> v*((fromIntegral $ ma-mi)+1) + (fromIntegral (w-mi))) 0 (zip3 bfirst bmin bmax))
[(min,max-min+1) | min <- bmin | max <- bmax]
| (st,end,bfirst,blast,bmin,bmax) <- ranges xml ]
writeFile (tbase</>tfile) $ unlines $
["{-# LANGUAGE MagicHash,DeriveDataTypeable #-}"]++
(case description xml of
Nothing -> []
Just str -> ["{- | "++str++" -}"]) ++
["module "++concat (intersperse "." (dirs++[bname]))
," ("++bname++"("++bname++"))"
," where"
,""
,"import Control.Throws"
,"import Data.Encoding.Base"
,"import Data.Encoding.ByteSink"
,"import Data.Encoding.ByteSource"
,"import Data.Encoding.Exception"
,"import Data.Array.Static"
,"import Data.Map.Static"
,"import Data.CharMap"
,"import Data.Char"
,"import Data.Word"
,"import Data.Typeable"
,""
,"data "++bname++" = "++bname
," deriving (Eq,Show,Typeable)"
,""
,mpname++" :: CharMap"
,mpname++" = "++mp
,""
,"instance Encoding "++bname++" where"
," encodeChar _ ch = mapEncode ch "++mpname
," decodeChar _ = "++body2
," encodeable _ ch = mapMember ch "++mpname
]
decodingValueRange :: [(Word8,Word8)] -> DecodingElement -> (Int,Int)
decodingValueRange path (DecodingElement c ws)
= let v = foldl (\n (w,(lo,up)) -> n*((fromIntegral $ up-lo)+1) + (fromIntegral $ w - lo)) 0 (zip ws path)
in (v,v)
decodingValueRange path (DecodingRange first last bfirst blast bmin bmax)
= assert (zip bmin bmax == path) $
(decodingValue path bfirst
,decodingValue path blast)
decodingValue :: [(Word8,Word8)] -> [Word8] -> Int
decodingValue path ws
= foldl (\n (w,(lo,up)) -> n*((fromIntegral $ up-lo) + 1) + (fromIntegral $ w - lo))
0 (zip ws path)
type StateMachine = Map String [(Word8,Word8,String)]
createDecoding :: StateMachine -> [DecodingElement] -> String
createDecoding sm els = create' els [] 0 "FIRST"
where
create' els path n st = let trans = sortBy (\(s1,e1,st1) (s2,e2,st2) -> compare s1 s2) $ sm Map.! st
in "(fetchWord8 >>= \\w"++show n++" -> " ++ tree' n path els trans 0 255++")"
tree' :: Int -> [(Word8,Word8)] -> [DecodingElement] -> [(Word8,Word8,String)] -> Word8 -> Word8 -> String
tree' n path els [] _ _ = illWord $ "w"++show n
tree' n path els [(s,e,nst)] bl br
= let e1 = if s > bl
then "(if w"++show n++" < "++show s++" then "++illWord ("w"++show n)++" else "++e2++")"
else e2
e2 = if e < br
then "(if w"++show n++" > "++show e++" then "++illWord ("w"++show n)++" else "++e3++")"
else e3
e3 = if nst == "VALID"
then array' rpath sels
else "{- for "++nst++"-}" ++ create' nels npath (n+1) nst
npath = (s,e):path
rpath = reverse npath
sels = sortBy (comparing (decodingValueRange rpath)) nels
nels = filter (\el -> let (ll,lr) = (decodingLimits el)!!n in ll>=s && lr <= e) els
in e1
tree' n path els trans bl br
= let (left,right@((b,_,_):_)) = splitAt (length trans `div` 2) trans
(eleft,eright) = partition (\el -> fst ((decodingLimits el)!!n) < b) els
in "(if w"++show n++" < "++show b++" then "++tree' n path eleft left bl (b-1)
++" else "++tree' n path eright right b br++")"
array' path els = let grps = groupBy (\e1 e2 -> case e1 of
DecodingRange _ _ _ _ _ _ -> False
_ -> case e2 of
DecodingRange _ _ _ _ _ _ -> False
_ -> True
) els
ranges = map (\(l,u) -> (fromIntegral $ u-l)+1) path
val = foldl (\expr (r,n,m) -> "("++expr++"*"++show r++"+(fromIntegral w"++show n++"-"++show m++"))")
"0"
(zip3 ranges [0..] (map fst path))
offset = (product ranges)-1
in "(let val = " ++ val ++ " in "++array'' path grps 0 offset++")"
array'' path [] _ _ = "throwException (IllegalRepresentation ["++concat (intersperse "," (zipWith (\n _ -> "w"++show n) [0..] path))++"])"
array'' path [grp] lo up
= case grp of
[DecodingRange first end bfirst bend bmin bmax] ->
let ranges = map (\(l,u) -> (fromIntegral $ u-l)+1) path
off = foldl (\v (r,c,m) -> v*r+(fromIntegral $ c-m)) 0 (zip3 ranges bfirst bmin)
equalranges = and $ zipWith (==) path (zip bmin bmax)
in if equalranges
then "(return (chr (val + ("++show (ord first - off)++"))))"
else error "Can't have a range that has a different range..."
_ -> let chars = fillRange lo $ map (\el@(DecodingElement c _) -> (c,fst $ decodingValueRange path el)) grp
in "(return (("++buildStaticArray (lo,up) chars++")!val))"
array'' path grps lo up = let (left,right@(brk:_)) = splitAt (length grps `div` 2) grps
(off,_) = decodingValueRange path (head brk)
in "(if val < "++show off++" then "++array'' path left lo (off-1)
++" else "++array'' path right off up++")"
fillRange :: Int -> [(Char,Int)] -> [Char]
fillRange s [] = []
fillRange s all@((c,i):cs) = case compare i s of
GT -> '\0':fillRange (s+1) all
LT -> error $ "Char out of range "++show (take 10 all)
EQ -> c:fillRange (s+1) cs
states :: CharacterMapping -> StateMachine
states (CharacterMapping attrs hist val ass)
= case val of
OneOf2 (Validity (NonEmpty lst)) -> Map.fromListWith (++) $
map (\st -> let BS [start] = stateS st
end = case stateE st of
Nothing -> start
Just (BS [rend]) -> rend
in (stateType st,[(start,end,stateNext st)])) lst
_ -> error "Mapping doesn't contain validity section"
decodingElements :: CharacterMapping -> [DecodingElement]
decodingElements mp = map (\(c,ws) -> DecodingElement c ws) (assignments mp)
++ map (\(fi,la,bfi,bla,bmi,bma) -> DecodingRange fi la bfi bla bmi bma) (ranges mp)
illWord :: String -> String
illWord n = "throwException (IllegalCharacter "++n++")"
decodingLimits :: DecodingElement -> [(Word8,Word8)]
decodingLimits (DecodingElement _ ws) = map (\w -> (w,w)) ws
decodingLimits (DecodingRange _ _ bfirst blast bmin bmax) = lim' False (zip4 bfirst blast bmin bmax)
where
lim' dec [] = []
lim' dec ((fi,la,mi,ma):xs) = if dec
then (mi,ma):(lim' dec xs)
else (fi,la):(lim' (fi/=la) xs)
decodingLength :: DecodingElement -> Int
decodingLength (DecodingRange _ _ first _ _ _) = length first
decodingLength (DecodingElement _ ws) = length ws
decodingElementCount :: DecodingElement -> Int
decodingElementCount (DecodingRange s e _ _ _ _) = ord e - ord s
decodingElementCount (DecodingElement _ _) = 1
data DecodingElement
= DecodingRange Char Char [Word8] [Word8] [Word8] [Word8]
| DecodingElement Char [Word8]
deriving Show
norep :: String -> String
norep var = "(throwException $ HasNoRepresentation "++var++")"
buildDecisionTree :: Char -> Char -> String -> [EncodingElement] -> String
buildDecisionTree l r var [] = norep var
buildDecisionTree l r var [el]
= let e1 = if l < startChar el
then "(if "++var++" < "++show (startChar el)++" then "++norep var++" else "++e2++")"
else e2
e2 = if r > endChar el
then "(if "++var++" > "++show (endChar el)++" then "++norep var++" else "++e3++")"
else e3
e3 = buildEncoding el var
in e1
buildDecisionTree ll lr var els
= let (l,r@(sep:_)) = splitAt (length els `div` 2) els
in "(if "++var++" < "++show (startChar sep)
++" then ("++(buildDecisionTree ll (pred $ startChar sep) var l)++")"
++" else ("++(buildDecisionTree (endChar sep) lr var r)++")"
++")"
buildEncoding :: EncodingElement -> String -> String
buildEncoding (EncodingRange start end bf bl bmin bmax) var
= let ranges :: [Int]
ranges = map fromIntegral $ zipWith (-) bmax bmin
in "(let num = (ord "++var++") - ("++show (ord start - (foldl (\n (r,vf,vm) -> n*(r+1) + (fromIntegral (vf-vm))) 0 (zip3 ranges bf bmin)))++")"
++concat ([ " ; (p"++show n++",r"++show n++") = "
++(if n==1 then "num" else "p"++show (n-1))
++" `divMod` "++show (r+1)
| r <- reverse ranges | n <- [1..] ])
++" in "
++concat (intersperse " >> " (reverse ["pushWord8 (fromIntegral (r"++show n++" + "++show w++"))" | n <- [1..] | w <- reverse bmin]))
++")"
buildEncoding (EncodingGroup start end encs) var
= let findParams st [] = st
findParams st (x:xs) = findParams (case compare (length x) (fst st) of
LT -> (fst st,False)
GT -> (length x,False)
EQ -> st) xs
(mx,same) = findParams (length $ head encs,True) (tail encs)
in if same
then ("(let off = "++show mx++"*(ord "++var++" - "++show (ord start)++") ; arr = "
++buildStaticArray (0,(length encs)*mx-1) (concat encs)
++" in "
++concat (intersperse " >> " ["pushWord8 (arr!(off+"++show (n-1)++"))" | n <- [1..mx]])
++")")
else ("(let off = "++show (mx+1)++"*((ord "++var++") - "++show (ord start)++") ; arr = "
++buildStaticArray (0,(length encs)*(mx+1)-1)
(concat [(fromIntegral $ length e)
:(e++replicate (mx-length e) 0) | e <- encs])
++ "::StaticArray Int Word8"
++" ; len = fromIntegral (arr!off)::Int ; bytes = map (\\n -> arr!(off+n)) [1..len]"
++" in mapM_ pushWord8 bytes)")
data EncodingElement
= EncodingRange Char Char [Word8] [Word8] [Word8] [Word8]
| EncodingGroup Char Char [[Word8]]
deriving Show
mergeBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
mergeBy f [] ys = ys
mergeBy f xs [] = xs
mergeBy f (x:xs) (y:ys)
= case f x y of
LT -> x:mergeBy f xs (y:ys)
_ -> y:mergeBy f (x:xs) ys
startChar :: EncodingElement -> Char
startChar (EncodingRange c _ _ _ _ _) = c
startChar (EncodingGroup c _ _) = c
endChar :: EncodingElement -> Char
endChar (EncodingRange _ c _ _ _ _) = c
endChar (EncodingGroup _ c _) = c
encodingElements :: CharacterMapping -> [EncodingElement]
encodingElements mp = mergeBy (comparing startChar)
(buildGroups $ sortAssignments $ assignments mp)
(encodingRanges $ ranges mp)
assignments :: CharacterMapping -> [(Char,[Word8])]
assignments (CharacterMapping _ _ _ (Assignments _ ass _ _ _ ranges))
= map (\a -> let CP [cp] = aU a
BS bs = aB a
in (cp,bs)
) ass
encodingRanges :: [(Char,Char,[Word8],[Word8],[Word8],[Word8])] -> [EncodingElement]
encodingRanges lst = sortBy (comparing (\(EncodingRange c _ _ _ _ _) -> c)) $
map (\(ufirst,ulast,bfirst,blast,bmin,bmax) -> EncodingRange ufirst ulast bfirst blast bmin bmax) lst
ranges :: CharacterMapping -> [(Char,Char,[Word8],[Word8],[Word8],[Word8])]
ranges (CharacterMapping _ _ _ (Assignments _ ass _ _ _ ranges))
= map (\r -> let BS bfirst = rangeBFirst r
BS blast = rangeBLast r
CP [ufirst] = rangeUFirst r
CP [ulast] = rangeULast r
BS bmin = rangeBMin r
BS bmax = rangeBMax r
in (ufirst,ulast,bfirst,blast,bmin,bmax)
) ranges
sortAssignments :: [(Char,[Word8])] -> [(Char,[Word8])]
sortAssignments = sortBy (comparing fst)
buildGroups :: [(Char,[Word8])] -> [EncodingElement]
buildGroups [] = []
buildGroups ((c,bs):rest) = (EncodingGroup c end (bs:wrds)):buildGroups oth
where
(end,wrds,oth) = group c rest
group n [] = (n,[],[])
group n all@((c,bs):rest)
| succ n == c = let (e,res,oth) = group c rest
in (e,bs:res,oth)
| otherwise = (n,[],all)