From ad6413d9b7f48c8df09ba6b9d58dfaf24c13996f Mon Sep 17 00:00:00 2001 From: Dylan Simon Date: Sat, 13 May 2017 00:41:41 -0400 Subject: [PATCH] Complete zipStream, untested --- Codec/Archive/Zip/Conduit/Internal.hs | 10 +- Codec/Archive/Zip/Conduit/UnZip.hs | 23 ++-- Codec/Archive/Zip/Conduit/Zip.hs | 144 +++++++++++++++++++++----- zip-stream.cabal | 1 + 4 files changed, 136 insertions(+), 42 deletions(-) diff --git a/Codec/Archive/Zip/Conduit/Internal.hs b/Codec/Archive/Zip/Conduit/Internal.hs index 1e27be3..4257a8b 100644 --- a/Codec/Archive/Zip/Conduit/Internal.hs +++ b/Codec/Archive/Zip/Conduit/Internal.hs @@ -2,7 +2,8 @@ module Codec.Archive.Zip.Conduit.Internal ( zipError , idConduit , sizeCRC - , zip64Size + , sizeC + , maxBound32 , deflateWindowBits ) where @@ -31,8 +32,11 @@ passthroughFold f z = C.await >>= maybe sizeCRC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m (Word64, Word32) sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0) -zip64Size :: Integral n => n -zip64Size = 0xffffffff +sizeC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m Word64 +sizeC = passthroughFold (\l b -> l + fromIntegral (BS.length b)) 0 -- fst <$> sizeCRC + +maxBound32 :: Integral n => n +maxBound32 = fromIntegral (maxBound :: Word32) deflateWindowBits :: WindowBits deflateWindowBits = WindowBits (-15) diff --git a/Codec/Archive/Zip/Conduit/UnZip.hs b/Codec/Archive/Zip/Conduit/UnZip.hs index 27a74ba..649bb7e 100644 --- a/Codec/Archive/Zip/Conduit/UnZip.hs +++ b/Codec/Archive/Zip/Conduit/UnZip.hs @@ -20,7 +20,7 @@ import qualified Data.ByteString.Char8 as BSC import qualified Data.Conduit as C import qualified Data.Conduit.List as CL import Data.Conduit.Serialization.Binary (sinkGet) -import Data.Conduit.Zlib (decompress) +import qualified Data.Conduit.Zlib as CZ import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian) import Data.Word (Word16, Word32, Word64) @@ -97,15 +97,19 @@ fromDOSTime time date = LocalTime -- Any errors are thrown in the underlying monad. unZipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo unZipStream = next where - next = do - h <- sinkGet header + next = do -- local header, or start central directory + h <- sinkGet $ do + sig <- G.getWord32le + case sig of + 0x04034b50 -> fileHeader + _ -> centralBody sig case h of FileHeader{..} -> do C.yield $ Left fileEntry r <- C.mapOutput Right $ case zipEntrySize fileEntry of Nothing -> do -- unknown size - ((csize, _), (size, crc)) <- C.fuseBoth sizeCRC + (csize, (size, crc)) <- C.fuseBoth sizeC $ fileDecompress C..| sizeCRC -- required data description @@ -128,11 +132,6 @@ unZipStream = next where next EndOfCentralDirectory{..} -> do return endInfo - header = do - sig <- G.getWord32le - case sig of - 0x04034b50 -> fileHeader - _ -> centralBody sig dataDesc h = -- this takes a bit of flexibility to account for the various cases (do -- with signature sig <- G.getWord32le @@ -162,7 +161,7 @@ unZipStream = next where dcomp <- case comp of 0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data" | otherwise -> return idConduit - 8 -> return $ decompress deflateWindowBits + 8 -> return $ CZ.decompress deflateWindowBits _ -> fail $ "Unsupported compression method: " ++ show comp time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le crc <- G.getWord32le @@ -177,8 +176,8 @@ unZipStream = next where G.isolate z $ case t of 0x0001 -> do -- the zip specs claim "the Local header MUST include BOTH" but "only if the corresponding field is set to 0xFFFFFFFF" - usiz' <- if usiz == zip64Size then G.getWord64le else return $ extZip64USize ext - csiz' <- if csiz == zip64Size then G.getWord64le else return $ extZip64CSize ext + usiz' <- if usiz == maxBound32 then G.getWord64le else return $ extZip64USize ext + csiz' <- if csiz == maxBound32 then G.getWord64le else return $ extZip64CSize ext return ext { extZip64 = True , extZip64USize = usiz' diff --git a/Codec/Archive/Zip/Conduit/Zip.hs b/Codec/Archive/Zip/Conduit/Zip.hs index 99a6f5d..fea19a2 100644 --- a/Codec/Archive/Zip/Conduit/Zip.hs +++ b/Codec/Archive/Zip/Conduit/Zip.hs @@ -15,6 +15,7 @@ import Control.Monad (when) import Control.Monad.Base (MonadBase) import Control.Monad.Catch (MonadThrow) import Control.Monad.Primitive (PrimMonad) +import Control.Monad.State.Strict (StateT, get) import Control.Monad.Trans.Resource (MonadResource) import qualified Data.Binary.Put as P import Data.Bits (bit, shiftL, shiftR, (.|.)) @@ -23,11 +24,13 @@ import qualified Data.ByteString.Char8 as BSC import qualified Data.ByteString.Lazy as BSL import qualified Data.Conduit as C import qualified Data.Conduit.Binary as CB +import Data.Conduit.Lift (stateC, execStateC) import Data.Conduit.Serialization.Binary (sourcePut) -import Data.Conduit.Zlib (compress) +import qualified Data.Conduit.Zlib as CZ import Data.Digest.CRC32 (crc32) import Data.Either (isLeft) -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, fromJust) +import Data.Monoid ((<>)) import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian) import Data.Word (Word16, Word64) @@ -35,7 +38,7 @@ import Codec.Archive.Zip.Conduit.Types import Codec.Archive.Zip.Conduit.Internal data ZipOptions = ZipOptions - { zipOpt64 :: Bool -- ^Allow zip file sizes over 4GB (reduces compatibility, but is otherwise safe for any file sizes) + { zipOpt64 :: Bool -- ^Allow 'ZipDataSource's over 4GB (reduces compatibility in some cases) , zipOptCompressLevel :: Int -- ^Compress (0 = store only, 9 = best) zipped files (improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package) , zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file } @@ -49,6 +52,11 @@ defaultZipOptions = ZipOptions } } +infixr 7 ?* +(?*) :: Num a => Bool -> a -> a +True ?* x = x +False ?* _ = 0 + data ZipData m = ZipDataByteString BSL.ByteString | ZipDataSource (C.Source m BS.ByteString) @@ -79,54 +87,136 @@ toDOSTime (LocalTime (toGregorian -> (year, month, day)) (TimeOfDay hour mins se , fromIntegral (year - 1980) `shiftL` 9 .|. fromIntegral month `shiftL` 5 .|. fromIntegral day ) +countBytes :: Monad m => C.ConduitM i BS.ByteString m a -> C.ConduitM i BS.ByteString (StateT Word64 m) a +countBytes c = stateC $ \s -> c `C.fuseBoth` ((s +) <$> sizeC) + +output :: MonadThrow m => P.Put -> C.ConduitM i BS.ByteString (StateT Word64 m) () +output = countBytes . sourcePut + +-- |The version of this zip program, really just rough indicator of compatibility +zipVersion :: Word16 +zipVersion = 48 + +maxBound16 :: Integral n => n +maxBound16 = fromIntegral (maxBound :: Word16) + zipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => ZipOptions -> C.ConduitM (ZipEntry, ZipData m) BS.ByteString m Word64 -zipStream ZipOptions{..} = do - C.awaitForever $ C.toProducer . entry - return 0 -- TODO: size +zipStream ZipOptions{..} = execStateC 0 $ do + (cnt, cdir) <- next 0 (mempty :: P.Put) + cdoff <- get + output cdir + eoff <- get + endDirectory cdoff (eoff - cdoff) cnt where + next cnt dir = C.await >>= maybe + (return (cnt, dir)) + (\e -> do + d <- entry e + next (succ cnt) $ dir <> d) entry (ZipEntry{..}, zipData -> dat) = do let usiz = dataSize dat - sdat = left (C..| sizeCRC) dat + sdat = left ((C..| sizeCRC) . C.toProducer) dat comp = zipOptCompressLevel /= 0 && all (0 /=) usiz (cdat, csiz) | comp = - ( ((`C.fuseBoth` (compress zipOptCompressLevel deflateWindowBits C..| (fst <$> sizeCRC))) + ( ((`C.fuseBoth` (CZ.compress zipOptCompressLevel deflateWindowBits C..| sizeC)) +++ Z.compress) sdat -- level for Z.compress? , dataSize cdat) | otherwise = (left (fmap (id &&& fst)) sdat, usiz) - z64 = maybe zipOpt64 (zip64Size <) (max <$> usiz <*> csiz) + z64 = maybe zipOpt64 (maxBound32 <) (max <$> usiz <*> csiz) namelen = BS.length zipEntryName - when (namelen > fromIntegral (maxBound :: Word16)) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long" - sourcePut $ do + (time, date) = toDOSTime zipEntryTime + mcrc = either (const Nothing) (Just . crc32) cdat + when (namelen > maxBound16) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long" + let common = do + P.putWord16le $ if z64 then 45 else 20 + P.putWord16le $ isLeft dat ?* bit 3 + P.putWord16le $ comp ?* 8 + P.putWord16le $ time + P.putWord16le $ date + off <- get + output $ do P.putWord32le 0x04034b50 - P.putWord16le $ if z64 then 45 else 20 - P.putWord16le $ if isLeft dat then bit 3 else 0 - P.putWord16le $ if comp then 8 else 0 - let (time, date) = toDOSTime zipEntryTime - P.putWord16le $ time - P.putWord16le $ date - P.putWord32le $ either (const 0) crc32 cdat - P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral csiz - P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral usiz + common + P.putWord32le $ fromMaybe 0 mcrc + P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral csiz + P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral usiz P.putWord16le $ fromIntegral namelen - P.putWord16le $ if z64 then 20 else 0 + P.putWord16le $ z64 ?* 20 P.putByteString zipEntryName when z64 $ do P.putWord16le 0x0001 P.putWord16le 16 P.putWord64le $ fromMaybe 0 usiz P.putWord64le $ fromMaybe 0 csiz - either + let outsz c = stateC $ \o -> (id &&& (o +) . snd) <$> c + ((usz, crc), csz) <- either (\cd -> do - ((usz, crc), csz) <- cd -- write compressed data - when (not z64 && (usz > zip64Size || csz > zip64Size)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled" - sourcePut $ do + r@((usz, crc), csz) <- outsz cd -- write compressed data + when (not z64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled" + output $ do P.putWord32le 0x08074b50 P.putWord32le crc let putsz | z64 = P.putWord64le | otherwise = P.putWord32le . fromIntegral putsz csz - putsz usz) - CB.sourceLbs + putsz usz + return r) + (\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b) cdat + return $ do + let o64 = off >= maxBound32 + l64 = z64 ?* 16 + o64 ?* 8 + P.putWord32le 0x02014b50 + P.putWord16le zipVersion + common + P.putWord32le crc + P.putWord32le $ if z64 then maxBound32 else fromIntegral csz + P.putWord32le $ if z64 then maxBound32 else fromIntegral usz + P.putWord16le $ fromIntegral namelen + P.putWord16le $ 4 + l64 + P.putWord16le 0 -- comment length + P.putWord16le 0 -- disk number + P.putWord16le 0 -- internal file attributes + P.putWord32le 0 -- external file attributes + P.putWord32le $ if o64 then maxBound32 else fromIntegral off + P.putByteString zipEntryName + when (z64 || o64) $ do + P.putWord16le 0x0001 + P.putWord16le l64 + when z64 $ do + P.putWord64le usz + P.putWord64le csz + when o64 $ + P.putWord64le off + endDirectory cdoff cdlen cnt = do + let z64 = zipOpt64 || cdoff > maxBound32 || cnt > maxBound16 + when z64 $ output $ do + P.putWord32le 0x06064b50 -- zip64 end + P.putWord64le 44 -- length of this record + P.putWord16le zipVersion + P.putWord16le 45 + P.putWord32le 0 -- disk + P.putWord32le 0 -- central disk + P.putWord64le cnt + P.putWord64le cnt + P.putWord64le cdlen + P.putWord64le cdoff + P.putWord32le 0x07064b50 -- locator: + P.putWord32le 0 -- central disk + P.putWord64le $ cdoff + cdlen + P.putWord32le 1 -- total disks + let comment = zipComment zipOptInfo + commlen = BS.length comment + when (commlen > maxBound16) $ zipError "comment too long" + output $ do + P.putWord32le 0x06054b50 -- end + P.putWord16le 0 -- disk + P.putWord16le 0 -- central disk + P.putWord16le $ fromIntegral $ min maxBound16 cnt + P.putWord16le $ fromIntegral $ min maxBound16 cnt + P.putWord32le $ fromIntegral $ min maxBound32 cdlen + P.putWord32le $ fromIntegral $ max maxBound32 cdoff + P.putWord16le $ fromIntegral commlen + P.putByteString comment diff --git a/zip-stream.cabal b/zip-stream.cabal index 545ed73..41e856b 100644 --- a/zip-stream.cabal +++ b/zip-stream.cabal @@ -33,6 +33,7 @@ library conduit-extra, digest, exceptions, + mtl, primitive, resourcet, time,