Complete zipStream, untested
This commit is contained in:
parent
c460089d1a
commit
ad6413d9b7
@ -2,7 +2,8 @@ module Codec.Archive.Zip.Conduit.Internal
|
||||
( zipError
|
||||
, idConduit
|
||||
, sizeCRC
|
||||
, zip64Size
|
||||
, sizeC
|
||||
, maxBound32
|
||||
, deflateWindowBits
|
||||
) where
|
||||
|
||||
@ -31,8 +32,11 @@ passthroughFold f z = C.await >>= maybe
|
||||
sizeCRC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m (Word64, Word32)
|
||||
sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0)
|
||||
|
||||
zip64Size :: Integral n => n
|
||||
zip64Size = 0xffffffff
|
||||
sizeC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m Word64
|
||||
sizeC = passthroughFold (\l b -> l + fromIntegral (BS.length b)) 0 -- fst <$> sizeCRC
|
||||
|
||||
maxBound32 :: Integral n => n
|
||||
maxBound32 = fromIntegral (maxBound :: Word32)
|
||||
|
||||
deflateWindowBits :: WindowBits
|
||||
deflateWindowBits = WindowBits (-15)
|
||||
|
||||
@ -20,7 +20,7 @@ import qualified Data.ByteString.Char8 as BSC
|
||||
import qualified Data.Conduit as C
|
||||
import qualified Data.Conduit.List as CL
|
||||
import Data.Conduit.Serialization.Binary (sinkGet)
|
||||
import Data.Conduit.Zlib (decompress)
|
||||
import qualified Data.Conduit.Zlib as CZ
|
||||
import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian)
|
||||
import Data.Word (Word16, Word32, Word64)
|
||||
|
||||
@ -97,15 +97,19 @@ fromDOSTime time date = LocalTime
|
||||
-- Any errors are thrown in the underlying monad.
|
||||
unZipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo
|
||||
unZipStream = next where
|
||||
next = do
|
||||
h <- sinkGet header
|
||||
next = do -- local header, or start central directory
|
||||
h <- sinkGet $ do
|
||||
sig <- G.getWord32le
|
||||
case sig of
|
||||
0x04034b50 -> fileHeader
|
||||
_ -> centralBody sig
|
||||
case h of
|
||||
FileHeader{..} -> do
|
||||
C.yield $ Left fileEntry
|
||||
r <- C.mapOutput Right $
|
||||
case zipEntrySize fileEntry of
|
||||
Nothing -> do -- unknown size
|
||||
((csize, _), (size, crc)) <- C.fuseBoth sizeCRC
|
||||
(csize, (size, crc)) <- C.fuseBoth sizeC
|
||||
$ fileDecompress
|
||||
C..| sizeCRC
|
||||
-- required data description
|
||||
@ -128,11 +132,6 @@ unZipStream = next where
|
||||
next
|
||||
EndOfCentralDirectory{..} -> do
|
||||
return endInfo
|
||||
header = do
|
||||
sig <- G.getWord32le
|
||||
case sig of
|
||||
0x04034b50 -> fileHeader
|
||||
_ -> centralBody sig
|
||||
dataDesc h = -- this takes a bit of flexibility to account for the various cases
|
||||
(do -- with signature
|
||||
sig <- G.getWord32le
|
||||
@ -162,7 +161,7 @@ unZipStream = next where
|
||||
dcomp <- case comp of
|
||||
0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data"
|
||||
| otherwise -> return idConduit
|
||||
8 -> return $ decompress deflateWindowBits
|
||||
8 -> return $ CZ.decompress deflateWindowBits
|
||||
_ -> fail $ "Unsupported compression method: " ++ show comp
|
||||
time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le
|
||||
crc <- G.getWord32le
|
||||
@ -177,8 +176,8 @@ unZipStream = next where
|
||||
G.isolate z $ case t of
|
||||
0x0001 -> do
|
||||
-- the zip specs claim "the Local header MUST include BOTH" but "only if the corresponding field is set to 0xFFFFFFFF"
|
||||
usiz' <- if usiz == zip64Size then G.getWord64le else return $ extZip64USize ext
|
||||
csiz' <- if csiz == zip64Size then G.getWord64le else return $ extZip64CSize ext
|
||||
usiz' <- if usiz == maxBound32 then G.getWord64le else return $ extZip64USize ext
|
||||
csiz' <- if csiz == maxBound32 then G.getWord64le else return $ extZip64CSize ext
|
||||
return ext
|
||||
{ extZip64 = True
|
||||
, extZip64USize = usiz'
|
||||
|
||||
@ -15,6 +15,7 @@ import Control.Monad (when)
|
||||
import Control.Monad.Base (MonadBase)
|
||||
import Control.Monad.Catch (MonadThrow)
|
||||
import Control.Monad.Primitive (PrimMonad)
|
||||
import Control.Monad.State.Strict (StateT, get)
|
||||
import Control.Monad.Trans.Resource (MonadResource)
|
||||
import qualified Data.Binary.Put as P
|
||||
import Data.Bits (bit, shiftL, shiftR, (.|.))
|
||||
@ -23,11 +24,13 @@ import qualified Data.ByteString.Char8 as BSC
|
||||
import qualified Data.ByteString.Lazy as BSL
|
||||
import qualified Data.Conduit as C
|
||||
import qualified Data.Conduit.Binary as CB
|
||||
import Data.Conduit.Lift (stateC, execStateC)
|
||||
import Data.Conduit.Serialization.Binary (sourcePut)
|
||||
import Data.Conduit.Zlib (compress)
|
||||
import qualified Data.Conduit.Zlib as CZ
|
||||
import Data.Digest.CRC32 (crc32)
|
||||
import Data.Either (isLeft)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Maybe (fromMaybe, fromJust)
|
||||
import Data.Monoid ((<>))
|
||||
import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian)
|
||||
import Data.Word (Word16, Word64)
|
||||
|
||||
@ -35,7 +38,7 @@ import Codec.Archive.Zip.Conduit.Types
|
||||
import Codec.Archive.Zip.Conduit.Internal
|
||||
|
||||
data ZipOptions = ZipOptions
|
||||
{ zipOpt64 :: Bool -- ^Allow zip file sizes over 4GB (reduces compatibility, but is otherwise safe for any file sizes)
|
||||
{ zipOpt64 :: Bool -- ^Allow 'ZipDataSource's over 4GB (reduces compatibility in some cases)
|
||||
, zipOptCompressLevel :: Int -- ^Compress (0 = store only, 9 = best) zipped files (improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package)
|
||||
, zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file
|
||||
}
|
||||
@ -49,6 +52,11 @@ defaultZipOptions = ZipOptions
|
||||
}
|
||||
}
|
||||
|
||||
infixr 7 ?*
|
||||
(?*) :: Num a => Bool -> a -> a
|
||||
True ?* x = x
|
||||
False ?* _ = 0
|
||||
|
||||
data ZipData m
|
||||
= ZipDataByteString BSL.ByteString
|
||||
| ZipDataSource (C.Source m BS.ByteString)
|
||||
@ -79,54 +87,136 @@ toDOSTime (LocalTime (toGregorian -> (year, month, day)) (TimeOfDay hour mins se
|
||||
, fromIntegral (year - 1980) `shiftL` 9 .|. fromIntegral month `shiftL` 5 .|. fromIntegral day
|
||||
)
|
||||
|
||||
countBytes :: Monad m => C.ConduitM i BS.ByteString m a -> C.ConduitM i BS.ByteString (StateT Word64 m) a
|
||||
countBytes c = stateC $ \s -> c `C.fuseBoth` ((s +) <$> sizeC)
|
||||
|
||||
output :: MonadThrow m => P.Put -> C.ConduitM i BS.ByteString (StateT Word64 m) ()
|
||||
output = countBytes . sourcePut
|
||||
|
||||
-- |The version of this zip program, really just rough indicator of compatibility
|
||||
zipVersion :: Word16
|
||||
zipVersion = 48
|
||||
|
||||
maxBound16 :: Integral n => n
|
||||
maxBound16 = fromIntegral (maxBound :: Word16)
|
||||
|
||||
zipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => ZipOptions -> C.ConduitM (ZipEntry, ZipData m) BS.ByteString m Word64
|
||||
zipStream ZipOptions{..} = do
|
||||
C.awaitForever $ C.toProducer . entry
|
||||
return 0 -- TODO: size
|
||||
zipStream ZipOptions{..} = execStateC 0 $ do
|
||||
(cnt, cdir) <- next 0 (mempty :: P.Put)
|
||||
cdoff <- get
|
||||
output cdir
|
||||
eoff <- get
|
||||
endDirectory cdoff (eoff - cdoff) cnt
|
||||
where
|
||||
next cnt dir = C.await >>= maybe
|
||||
(return (cnt, dir))
|
||||
(\e -> do
|
||||
d <- entry e
|
||||
next (succ cnt) $ dir <> d)
|
||||
entry (ZipEntry{..}, zipData -> dat) = do
|
||||
let usiz = dataSize dat
|
||||
sdat = left (C..| sizeCRC) dat
|
||||
sdat = left ((C..| sizeCRC) . C.toProducer) dat
|
||||
comp = zipOptCompressLevel /= 0 && all (0 /=) usiz
|
||||
(cdat, csiz)
|
||||
| comp =
|
||||
( ((`C.fuseBoth` (compress zipOptCompressLevel deflateWindowBits C..| (fst <$> sizeCRC)))
|
||||
( ((`C.fuseBoth` (CZ.compress zipOptCompressLevel deflateWindowBits C..| sizeC))
|
||||
+++ Z.compress) sdat -- level for Z.compress?
|
||||
, dataSize cdat)
|
||||
| otherwise = (left (fmap (id &&& fst)) sdat, usiz)
|
||||
z64 = maybe zipOpt64 (zip64Size <) (max <$> usiz <*> csiz)
|
||||
z64 = maybe zipOpt64 (maxBound32 <) (max <$> usiz <*> csiz)
|
||||
namelen = BS.length zipEntryName
|
||||
when (namelen > fromIntegral (maxBound :: Word16)) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long"
|
||||
sourcePut $ do
|
||||
(time, date) = toDOSTime zipEntryTime
|
||||
mcrc = either (const Nothing) (Just . crc32) cdat
|
||||
when (namelen > maxBound16) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long"
|
||||
let common = do
|
||||
P.putWord16le $ if z64 then 45 else 20
|
||||
P.putWord16le $ isLeft dat ?* bit 3
|
||||
P.putWord16le $ comp ?* 8
|
||||
P.putWord16le $ time
|
||||
P.putWord16le $ date
|
||||
off <- get
|
||||
output $ do
|
||||
P.putWord32le 0x04034b50
|
||||
P.putWord16le $ if z64 then 45 else 20
|
||||
P.putWord16le $ if isLeft dat then bit 3 else 0
|
||||
P.putWord16le $ if comp then 8 else 0
|
||||
let (time, date) = toDOSTime zipEntryTime
|
||||
P.putWord16le $ time
|
||||
P.putWord16le $ date
|
||||
P.putWord32le $ either (const 0) crc32 cdat
|
||||
P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral csiz
|
||||
P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral usiz
|
||||
common
|
||||
P.putWord32le $ fromMaybe 0 mcrc
|
||||
P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral csiz
|
||||
P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral usiz
|
||||
P.putWord16le $ fromIntegral namelen
|
||||
P.putWord16le $ if z64 then 20 else 0
|
||||
P.putWord16le $ z64 ?* 20
|
||||
P.putByteString zipEntryName
|
||||
when z64 $ do
|
||||
P.putWord16le 0x0001
|
||||
P.putWord16le 16
|
||||
P.putWord64le $ fromMaybe 0 usiz
|
||||
P.putWord64le $ fromMaybe 0 csiz
|
||||
either
|
||||
let outsz c = stateC $ \o -> (id &&& (o +) . snd) <$> c
|
||||
((usz, crc), csz) <- either
|
||||
(\cd -> do
|
||||
((usz, crc), csz) <- cd -- write compressed data
|
||||
when (not z64 && (usz > zip64Size || csz > zip64Size)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled"
|
||||
sourcePut $ do
|
||||
r@((usz, crc), csz) <- outsz cd -- write compressed data
|
||||
when (not z64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled"
|
||||
output $ do
|
||||
P.putWord32le 0x08074b50
|
||||
P.putWord32le crc
|
||||
let putsz
|
||||
| z64 = P.putWord64le
|
||||
| otherwise = P.putWord32le . fromIntegral
|
||||
putsz csz
|
||||
putsz usz)
|
||||
CB.sourceLbs
|
||||
putsz usz
|
||||
return r)
|
||||
(\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b)
|
||||
cdat
|
||||
return $ do
|
||||
let o64 = off >= maxBound32
|
||||
l64 = z64 ?* 16 + o64 ?* 8
|
||||
P.putWord32le 0x02014b50
|
||||
P.putWord16le zipVersion
|
||||
common
|
||||
P.putWord32le crc
|
||||
P.putWord32le $ if z64 then maxBound32 else fromIntegral csz
|
||||
P.putWord32le $ if z64 then maxBound32 else fromIntegral usz
|
||||
P.putWord16le $ fromIntegral namelen
|
||||
P.putWord16le $ 4 + l64
|
||||
P.putWord16le 0 -- comment length
|
||||
P.putWord16le 0 -- disk number
|
||||
P.putWord16le 0 -- internal file attributes
|
||||
P.putWord32le 0 -- external file attributes
|
||||
P.putWord32le $ if o64 then maxBound32 else fromIntegral off
|
||||
P.putByteString zipEntryName
|
||||
when (z64 || o64) $ do
|
||||
P.putWord16le 0x0001
|
||||
P.putWord16le l64
|
||||
when z64 $ do
|
||||
P.putWord64le usz
|
||||
P.putWord64le csz
|
||||
when o64 $
|
||||
P.putWord64le off
|
||||
endDirectory cdoff cdlen cnt = do
|
||||
let z64 = zipOpt64 || cdoff > maxBound32 || cnt > maxBound16
|
||||
when z64 $ output $ do
|
||||
P.putWord32le 0x06064b50 -- zip64 end
|
||||
P.putWord64le 44 -- length of this record
|
||||
P.putWord16le zipVersion
|
||||
P.putWord16le 45
|
||||
P.putWord32le 0 -- disk
|
||||
P.putWord32le 0 -- central disk
|
||||
P.putWord64le cnt
|
||||
P.putWord64le cnt
|
||||
P.putWord64le cdlen
|
||||
P.putWord64le cdoff
|
||||
P.putWord32le 0x07064b50 -- locator:
|
||||
P.putWord32le 0 -- central disk
|
||||
P.putWord64le $ cdoff + cdlen
|
||||
P.putWord32le 1 -- total disks
|
||||
let comment = zipComment zipOptInfo
|
||||
commlen = BS.length comment
|
||||
when (commlen > maxBound16) $ zipError "comment too long"
|
||||
output $ do
|
||||
P.putWord32le 0x06054b50 -- end
|
||||
P.putWord16le 0 -- disk
|
||||
P.putWord16le 0 -- central disk
|
||||
P.putWord16le $ fromIntegral $ min maxBound16 cnt
|
||||
P.putWord16le $ fromIntegral $ min maxBound16 cnt
|
||||
P.putWord32le $ fromIntegral $ min maxBound32 cdlen
|
||||
P.putWord32le $ fromIntegral $ max maxBound32 cdoff
|
||||
P.putWord16le $ fromIntegral commlen
|
||||
P.putByteString comment
|
||||
|
||||
@ -33,6 +33,7 @@ library
|
||||
conduit-extra,
|
||||
digest,
|
||||
exceptions,
|
||||
mtl,
|
||||
primitive,
|
||||
resourcet,
|
||||
time,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user