From 76c095ce7cd7fe0e963c390980e406b8b0b344d0 Mon Sep 17 00:00:00 2001 From: Dylan Simon Date: Sat, 13 May 2017 19:29:27 -0400 Subject: [PATCH] Fix and enable unzipping deflated stream data man, inputSize, huh? fuseLeftovers isn't quite good enough. --- Codec/Archive/Zip/Conduit/Internal.hs | 17 ++++++++++++++++- Codec/Archive/Zip/Conduit/UnZip.hs | 10 +++++----- Codec/Archive/Zip/Conduit/Zip.hs | 8 ++++---- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/Codec/Archive/Zip/Conduit/Internal.hs b/Codec/Archive/Zip/Conduit/Internal.hs index 1ee8387..f3c0733 100644 --- a/Codec/Archive/Zip/Conduit/Internal.hs +++ b/Codec/Archive/Zip/Conduit/Internal.hs @@ -3,7 +3,8 @@ module Codec.Archive.Zip.Conduit.Internal , zipError , idConduit , sizeCRC - , sizeC + , outputSize + , inputSize , maxBound32 , deflateWindowBits ) where @@ -12,6 +13,7 @@ import Codec.Compression.Zlib.Raw (WindowBits(..)) import Control.Monad.Catch (MonadThrow, throwM) import qualified Data.ByteString as BS import qualified Data.Conduit as C +import qualified Data.Conduit.Internal as CI import Data.Digest.CRC32 (crc32Update) import Data.Word (Word16, Word32, Word64) @@ -40,6 +42,19 @@ sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Up sizeC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m Word64 sizeC = passthroughFold (\l b -> l + fromIntegral (BS.length b)) 0 -- fst <$> sizeCRC +outputSize :: Monad m => C.Conduit i m BS.ByteString -> C.ConduitM i BS.ByteString m Word64 +outputSize = (C..| sizeC) + +inputSize :: Monad m => C.Conduit BS.ByteString m o -> C.ConduitM BS.ByteString o m Word64 +-- inputSize = fuseUpstream sizeC -- won't work because we need to deal with leftovers properly +inputSize (CI.ConduitM src) = CI.ConduitM $ \rest -> let + go n (CI.Done ()) = rest n + go n (CI.PipeM m) = CI.PipeM $ go n <$> m + go n (CI.Leftover p b) = CI.Leftover (go (n - fromIntegral (BS.length b)) p) b + go n (CI.HaveOutput p f o) = CI.HaveOutput (go n p) f o + go n (CI.NeedInput p q) = CI.NeedInput (\b -> go (n + fromIntegral (BS.length b)) (p b)) (go n . q) + in go 0 (src CI.Done) + maxBound32 :: Integral n => n maxBound32 = fromIntegral (maxBound :: Word32) diff --git a/Codec/Archive/Zip/Conduit/UnZip.hs b/Codec/Archive/Zip/Conduit/UnZip.hs index a574eda..484eba5 100644 --- a/Codec/Archive/Zip/Conduit/UnZip.hs +++ b/Codec/Archive/Zip/Conduit/UnZip.hs @@ -14,7 +14,7 @@ import Control.Monad.Base (MonadBase) import Control.Monad.Catch (MonadThrow) import Control.Monad.Primitive (PrimMonad) import qualified Data.Binary.Get as G -import Data.Bits ((.&.), complement, testBit, shiftL, shiftR) +import Data.Bits ((.&.), testBit, clearBit, shiftL, shiftR) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import qualified Data.Conduit as C @@ -109,9 +109,8 @@ unZipStream = next where r <- C.mapOutput Right $ case zipEntrySize fileEntry of Nothing -> do -- unknown size - (csize, (size, crc)) <- C.fuseBoth sizeC - $ fileDecompress - C..| sizeCRC + (csize, (size, crc)) <- inputSize fileDecompress `C.fuseBoth` sizeCRC + -- traceM $ "csize=" ++ show csize ++ " size=" ++ show size ++ " crc=" ++ show crc -- required data description sinkGet $ dataDesc h { fileCSize = csize @@ -156,7 +155,8 @@ unZipStream = next where ver <- G.getWord16le when (ver > zipVersion) $ fail $ "Unsupported version: " ++ show ver gpf <- G.getWord16le - when (gpf .&. complement 0o06 /= 0) $ fail $ "Unsupported flags: " ++ show gpf + -- when (gpf .&. complement (bit 1 .|. bit 2 .|. bit 3) /= 0) $ fail $ "Unsupported flags: " ++ show gpf + when (gpf `clearBit` 1 `clearBit` 2 `clearBit` 3 /= 0) $ fail $ "Unsupported flags: " ++ show gpf comp <- G.getWord16le dcomp <- case comp of 0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data" diff --git a/Codec/Archive/Zip/Conduit/Zip.hs b/Codec/Archive/Zip/Conduit/Zip.hs index 169e519..79f79ce 100644 --- a/Codec/Archive/Zip/Conduit/Zip.hs +++ b/Codec/Archive/Zip/Conduit/Zip.hs @@ -92,11 +92,11 @@ toDOSTime (LocalTime (toGregorian -> (year, month, day)) (TimeOfDay hour mins se , fromIntegral (year - 1980) `shiftL` 9 .|. fromIntegral month `shiftL` 5 .|. fromIntegral day ) -countBytes :: Monad m => C.ConduitM i BS.ByteString m a -> C.ConduitM i BS.ByteString (StateT Word64 m) a -countBytes c = stateC $ \s -> c `C.fuseBoth` ((s +) <$> sizeC) +countOutput :: Monad m => C.Conduit i m BS.ByteString -> C.Conduit i (StateT Word64 m) BS.ByteString +countOutput c = stateC $ \s -> (,) () . (s +) <$> outputSize c output :: MonadThrow m => P.Put -> C.ConduitM i BS.ByteString (StateT Word64 m) () -output = countBytes . sourcePut +output = countOutput . sourcePut maxBound16 :: Integral n => n maxBound16 = fromIntegral (maxBound :: Word16) @@ -125,7 +125,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do comp = zipOptCompressLevel /= 0 && all (0 /=) usiz (cdat, csiz) | comp = - ( ((`C.fuseBoth` (CZ.compress zipOptCompressLevel deflateWindowBits C..| sizeC)) + ( ((`C.fuseBoth` (outputSize $ CZ.compress zipOptCompressLevel deflateWindowBits)) +++ Z.compress) sdat -- level for Z.compress? , dataSize cdat) | otherwise = (left (fmap (id &&& fst)) sdat, usiz)