Complete zipStream, untested

This commit is contained in:
Dylan Simon 2017-05-13 00:41:41 -04:00
parent c460089d1a
commit ad6413d9b7
4 changed files with 136 additions and 42 deletions

View File

@ -2,7 +2,8 @@ module Codec.Archive.Zip.Conduit.Internal
( zipError
, idConduit
, sizeCRC
, zip64Size
, sizeC
, maxBound32
, deflateWindowBits
) where
@ -31,8 +32,11 @@ passthroughFold f z = C.await >>= maybe
sizeCRC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m (Word64, Word32)
sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0)
zip64Size :: Integral n => n
zip64Size = 0xffffffff
sizeC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m Word64
sizeC = passthroughFold (\l b -> l + fromIntegral (BS.length b)) 0 -- fst <$> sizeCRC
maxBound32 :: Integral n => n
maxBound32 = fromIntegral (maxBound :: Word32)
deflateWindowBits :: WindowBits
deflateWindowBits = WindowBits (-15)

View File

@ -20,7 +20,7 @@ import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL
import Data.Conduit.Serialization.Binary (sinkGet)
import Data.Conduit.Zlib (decompress)
import qualified Data.Conduit.Zlib as CZ
import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian)
import Data.Word (Word16, Word32, Word64)
@ -97,15 +97,19 @@ fromDOSTime time date = LocalTime
-- Any errors are thrown in the underlying monad.
unZipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo
unZipStream = next where
next = do
h <- sinkGet header
next = do -- local header, or start central directory
h <- sinkGet $ do
sig <- G.getWord32le
case sig of
0x04034b50 -> fileHeader
_ -> centralBody sig
case h of
FileHeader{..} -> do
C.yield $ Left fileEntry
r <- C.mapOutput Right $
case zipEntrySize fileEntry of
Nothing -> do -- unknown size
((csize, _), (size, crc)) <- C.fuseBoth sizeCRC
(csize, (size, crc)) <- C.fuseBoth sizeC
$ fileDecompress
C..| sizeCRC
-- required data description
@ -128,11 +132,6 @@ unZipStream = next where
next
EndOfCentralDirectory{..} -> do
return endInfo
header = do
sig <- G.getWord32le
case sig of
0x04034b50 -> fileHeader
_ -> centralBody sig
dataDesc h = -- this takes a bit of flexibility to account for the various cases
(do -- with signature
sig <- G.getWord32le
@ -162,7 +161,7 @@ unZipStream = next where
dcomp <- case comp of
0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data"
| otherwise -> return idConduit
8 -> return $ decompress deflateWindowBits
8 -> return $ CZ.decompress deflateWindowBits
_ -> fail $ "Unsupported compression method: " ++ show comp
time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le
crc <- G.getWord32le
@ -177,8 +176,8 @@ unZipStream = next where
G.isolate z $ case t of
0x0001 -> do
-- the zip specs claim "the Local header MUST include BOTH" but "only if the corresponding field is set to 0xFFFFFFFF"
usiz' <- if usiz == zip64Size then G.getWord64le else return $ extZip64USize ext
csiz' <- if csiz == zip64Size then G.getWord64le else return $ extZip64CSize ext
usiz' <- if usiz == maxBound32 then G.getWord64le else return $ extZip64USize ext
csiz' <- if csiz == maxBound32 then G.getWord64le else return $ extZip64CSize ext
return ext
{ extZip64 = True
, extZip64USize = usiz'

View File

@ -15,6 +15,7 @@ import Control.Monad (when)
import Control.Monad.Base (MonadBase)
import Control.Monad.Catch (MonadThrow)
import Control.Monad.Primitive (PrimMonad)
import Control.Monad.State.Strict (StateT, get)
import Control.Monad.Trans.Resource (MonadResource)
import qualified Data.Binary.Put as P
import Data.Bits (bit, shiftL, shiftR, (.|.))
@ -23,11 +24,13 @@ import qualified Data.ByteString.Char8 as BSC
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB
import Data.Conduit.Lift (stateC, execStateC)
import Data.Conduit.Serialization.Binary (sourcePut)
import Data.Conduit.Zlib (compress)
import qualified Data.Conduit.Zlib as CZ
import Data.Digest.CRC32 (crc32)
import Data.Either (isLeft)
import Data.Maybe (fromMaybe)
import Data.Maybe (fromMaybe, fromJust)
import Data.Monoid ((<>))
import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian)
import Data.Word (Word16, Word64)
@ -35,7 +38,7 @@ import Codec.Archive.Zip.Conduit.Types
import Codec.Archive.Zip.Conduit.Internal
data ZipOptions = ZipOptions
{ zipOpt64 :: Bool -- ^Allow zip file sizes over 4GB (reduces compatibility, but is otherwise safe for any file sizes)
{ zipOpt64 :: Bool -- ^Allow 'ZipDataSource's over 4GB (reduces compatibility in some cases)
, zipOptCompressLevel :: Int -- ^Compress (0 = store only, 9 = best) zipped files (improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package)
, zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file
}
@ -49,6 +52,11 @@ defaultZipOptions = ZipOptions
}
}
infixr 7 ?*
(?*) :: Num a => Bool -> a -> a
True ?* x = x
False ?* _ = 0
data ZipData m
= ZipDataByteString BSL.ByteString
| ZipDataSource (C.Source m BS.ByteString)
@ -79,54 +87,136 @@ toDOSTime (LocalTime (toGregorian -> (year, month, day)) (TimeOfDay hour mins se
, fromIntegral (year - 1980) `shiftL` 9 .|. fromIntegral month `shiftL` 5 .|. fromIntegral day
)
countBytes :: Monad m => C.ConduitM i BS.ByteString m a -> C.ConduitM i BS.ByteString (StateT Word64 m) a
countBytes c = stateC $ \s -> c `C.fuseBoth` ((s +) <$> sizeC)
output :: MonadThrow m => P.Put -> C.ConduitM i BS.ByteString (StateT Word64 m) ()
output = countBytes . sourcePut
-- |The version of this zip program, really just rough indicator of compatibility
zipVersion :: Word16
zipVersion = 48
maxBound16 :: Integral n => n
maxBound16 = fromIntegral (maxBound :: Word16)
zipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => ZipOptions -> C.ConduitM (ZipEntry, ZipData m) BS.ByteString m Word64
zipStream ZipOptions{..} = do
C.awaitForever $ C.toProducer . entry
return 0 -- TODO: size
zipStream ZipOptions{..} = execStateC 0 $ do
(cnt, cdir) <- next 0 (mempty :: P.Put)
cdoff <- get
output cdir
eoff <- get
endDirectory cdoff (eoff - cdoff) cnt
where
next cnt dir = C.await >>= maybe
(return (cnt, dir))
(\e -> do
d <- entry e
next (succ cnt) $ dir <> d)
entry (ZipEntry{..}, zipData -> dat) = do
let usiz = dataSize dat
sdat = left (C..| sizeCRC) dat
sdat = left ((C..| sizeCRC) . C.toProducer) dat
comp = zipOptCompressLevel /= 0 && all (0 /=) usiz
(cdat, csiz)
| comp =
( ((`C.fuseBoth` (compress zipOptCompressLevel deflateWindowBits C..| (fst <$> sizeCRC)))
( ((`C.fuseBoth` (CZ.compress zipOptCompressLevel deflateWindowBits C..| sizeC))
+++ Z.compress) sdat -- level for Z.compress?
, dataSize cdat)
| otherwise = (left (fmap (id &&& fst)) sdat, usiz)
z64 = maybe zipOpt64 (zip64Size <) (max <$> usiz <*> csiz)
z64 = maybe zipOpt64 (maxBound32 <) (max <$> usiz <*> csiz)
namelen = BS.length zipEntryName
when (namelen > fromIntegral (maxBound :: Word16)) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long"
sourcePut $ do
(time, date) = toDOSTime zipEntryTime
mcrc = either (const Nothing) (Just . crc32) cdat
when (namelen > maxBound16) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long"
let common = do
P.putWord16le $ if z64 then 45 else 20
P.putWord16le $ isLeft dat ?* bit 3
P.putWord16le $ comp ?* 8
P.putWord16le $ time
P.putWord16le $ date
off <- get
output $ do
P.putWord32le 0x04034b50
P.putWord16le $ if z64 then 45 else 20
P.putWord16le $ if isLeft dat then bit 3 else 0
P.putWord16le $ if comp then 8 else 0
let (time, date) = toDOSTime zipEntryTime
P.putWord16le $ time
P.putWord16le $ date
P.putWord32le $ either (const 0) crc32 cdat
P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral csiz
P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral usiz
common
P.putWord32le $ fromMaybe 0 mcrc
P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral csiz
P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral usiz
P.putWord16le $ fromIntegral namelen
P.putWord16le $ if z64 then 20 else 0
P.putWord16le $ z64 ?* 20
P.putByteString zipEntryName
when z64 $ do
P.putWord16le 0x0001
P.putWord16le 16
P.putWord64le $ fromMaybe 0 usiz
P.putWord64le $ fromMaybe 0 csiz
either
let outsz c = stateC $ \o -> (id &&& (o +) . snd) <$> c
((usz, crc), csz) <- either
(\cd -> do
((usz, crc), csz) <- cd -- write compressed data
when (not z64 && (usz > zip64Size || csz > zip64Size)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled"
sourcePut $ do
r@((usz, crc), csz) <- outsz cd -- write compressed data
when (not z64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled"
output $ do
P.putWord32le 0x08074b50
P.putWord32le crc
let putsz
| z64 = P.putWord64le
| otherwise = P.putWord32le . fromIntegral
putsz csz
putsz usz)
CB.sourceLbs
putsz usz
return r)
(\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b)
cdat
return $ do
let o64 = off >= maxBound32
l64 = z64 ?* 16 + o64 ?* 8
P.putWord32le 0x02014b50
P.putWord16le zipVersion
common
P.putWord32le crc
P.putWord32le $ if z64 then maxBound32 else fromIntegral csz
P.putWord32le $ if z64 then maxBound32 else fromIntegral usz
P.putWord16le $ fromIntegral namelen
P.putWord16le $ 4 + l64
P.putWord16le 0 -- comment length
P.putWord16le 0 -- disk number
P.putWord16le 0 -- internal file attributes
P.putWord32le 0 -- external file attributes
P.putWord32le $ if o64 then maxBound32 else fromIntegral off
P.putByteString zipEntryName
when (z64 || o64) $ do
P.putWord16le 0x0001
P.putWord16le l64
when z64 $ do
P.putWord64le usz
P.putWord64le csz
when o64 $
P.putWord64le off
endDirectory cdoff cdlen cnt = do
let z64 = zipOpt64 || cdoff > maxBound32 || cnt > maxBound16
when z64 $ output $ do
P.putWord32le 0x06064b50 -- zip64 end
P.putWord64le 44 -- length of this record
P.putWord16le zipVersion
P.putWord16le 45
P.putWord32le 0 -- disk
P.putWord32le 0 -- central disk
P.putWord64le cnt
P.putWord64le cnt
P.putWord64le cdlen
P.putWord64le cdoff
P.putWord32le 0x07064b50 -- locator:
P.putWord32le 0 -- central disk
P.putWord64le $ cdoff + cdlen
P.putWord32le 1 -- total disks
let comment = zipComment zipOptInfo
commlen = BS.length comment
when (commlen > maxBound16) $ zipError "comment too long"
output $ do
P.putWord32le 0x06054b50 -- end
P.putWord16le 0 -- disk
P.putWord16le 0 -- central disk
P.putWord16le $ fromIntegral $ min maxBound16 cnt
P.putWord16le $ fromIntegral $ min maxBound16 cnt
P.putWord32le $ fromIntegral $ min maxBound32 cdlen
P.putWord32le $ fromIntegral $ max maxBound32 cdoff
P.putWord16le $ fromIntegral commlen
P.putByteString comment

View File

@ -33,6 +33,7 @@ library
conduit-extra,
digest,
exceptions,
mtl,
primitive,
resourcet,
time,