Start zipStream implementation

Only local headers and data so far; no central directory yet
This commit is contained in:
Dylan Simon 2017-05-12 17:17:35 -04:00
parent 49f413a492
commit c460089d1a
5 changed files with 175 additions and 26 deletions

View File

@ -1,19 +1,26 @@
module Codec.Archive.Zip.Conduit.Internal
( zipError
, idConduit
, sizeCRC
, zip64Size
, deflateWindowBits
) where
import Codec.Compression.Zlib.Raw (WindowBits(..))
import Control.Monad.Catch (MonadThrow, throwM)
import qualified Data.ByteString as BS
import qualified Data.Conduit as C
import Data.Digest.CRC32 (crc32Update)
import Data.Word (Word32)
import Data.Word (Word32, Word64)
import Codec.Archive.Zip.Conduit.Types
zipError :: MonadThrow m => String -> m a
zipError = throwM . ZipError
idConduit :: Monad m => C.Conduit a m a
idConduit = C.awaitForever C.yield
passthroughFold :: Monad m => (a -> b -> a) -> a -> C.ConduitM b b m a
passthroughFold f z = C.await >>= maybe
(return z)
@ -21,5 +28,11 @@ passthroughFold f z = C.await >>= maybe
C.yield x
passthroughFold f (f z x))
sizeCRC :: (Monad m, Integral n) => C.ConduitM BS.ByteString BS.ByteString m (n, Word32)
sizeCRC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m (Word64, Word32)
sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0)
zip64Size :: Integral n => n
zip64Size = 0xffffffff
deflateWindowBits :: WindowBits
deflateWindowBits = WindowBits (-15)

View File

@ -5,7 +5,7 @@ module Codec.Archive.Zip.Conduit.UnZip
( ZipEntry(..)
, ZipInfo(..)
, ZipError
, unZip
, unZipStream
) where
import Control.Applicative ((<|>), empty)
@ -20,9 +20,9 @@ import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL
import Data.Conduit.Serialization.Binary (sinkGet)
import Data.Conduit.Zlib (WindowBits(..), decompress)
import Data.Time (LocalTime(..), fromGregorian, TimeOfDay(..))
import Data.Word (Word32, Word64)
import Data.Conduit.Zlib (decompress)
import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian)
import Data.Word (Word16, Word32, Word64)
import Codec.Archive.Zip.Conduit.Types
import Codec.Archive.Zip.Conduit.Internal
@ -75,6 +75,17 @@ foldGet g z = do
e <- G.isEmpty
if e then return z else g z >>= foldGet g
fromDOSTime :: Word16 -> Word16 -> LocalTime
fromDOSTime time date = LocalTime
(fromGregorian
(fromIntegral $ date `shiftR` 9 + 1980)
(fromIntegral $ date `shiftR` 5 .&. 0x0f)
(fromIntegral $ date .&. 0x1f))
(TimeOfDay
(fromIntegral $ time `shiftR` 11)
(fromIntegral $ time `shiftR` 5 .&. 0x3f)
(fromIntegral $ time `shiftL` 1 .&. 0x3f))
-- |Stream a zip file, producing a sequence of entry headers and data blocks.
-- For example, this might produce: @Left (ZipEntry "directory\/" ...), Left (ZipEntry "directory\/file.txt" ...), Right "hello w", Right "orld!\\n", Left ...@
-- The final result is summary information taken from the end of the zip file.
@ -84,8 +95,8 @@ foldGet g z = do
-- It does not (ironically) support uncompressed zip files that have been created as streams, where file sizes are not known beforehand.
-- Since it does not use the offset information at the end of the file, it assumes all entries are packed sequentially, which is usually the case.
-- Any errors are thrown in the underlying monad.
unZip :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo
unZip = next where
unZipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo
unZipStream = next where
next = do
h <- sinkGet header
case h of
@ -150,20 +161,10 @@ unZip = next where
comp <- G.getWord16le
dcomp <- case comp of
0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data"
| otherwise -> return $ C.awaitForever C.yield -- idConduit
8 -> return $ decompress (WindowBits (-15))
| otherwise -> return idConduit
8 -> return $ decompress deflateWindowBits
_ -> fail $ "Unsupported compression method: " ++ show comp
time <- G.getWord16le
date <- G.getWord16le
let mtime = LocalTime
(fromGregorian
(fromIntegral $ date `shiftR` 9 + 1980)
(fromIntegral $ date `shiftR` 5 .&. 0x0f)
(fromIntegral $ date .&. 0x1f))
(TimeOfDay
(fromIntegral $ time `shiftR` 11)
(fromIntegral $ time `shiftR` 5 .&. 0x3f)
(fromIntegral $ time `shiftL` 1 .&. 0x3f))
time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le
crc <- G.getWord32le
csiz <- G.getWord32le
usiz <- G.getWord32le
@ -176,8 +177,8 @@ unZip = next where
G.isolate z $ case t of
0x0001 -> do
-- the zip specs claim "the Local header MUST include BOTH" but "only if the corresponding field is set to 0xFFFFFFFF"
usiz' <- if usiz == maxBound then G.getWord64le else return $ extZip64USize ext
csiz' <- if csiz == maxBound then G.getWord64le else return $ extZip64CSize ext
usiz' <- if usiz == zip64Size then G.getWord64le else return $ extZip64USize ext
csiz' <- if csiz == zip64Size then G.getWord64le else return $ extZip64CSize ext
return ext
{ extZip64 = True
, extZip64USize = usiz'
@ -207,7 +208,7 @@ unZip = next where
return FileHeader
{ fileEntry = ZipEntry
{ zipEntryName = name
, zipEntryTime = mtime
, zipEntryTime = time
, zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize
}
, fileDecompress = dcomp

View File

@ -0,0 +1,132 @@
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ViewPatterns #-}
module Codec.Archive.Zip.Conduit.Zip
( ZipOptions(..)
, defaultZipOptions
, ZipEntry(..)
, ZipData(..)
, zipFileData
, zipStream
) where
import qualified Codec.Compression.Zlib.Raw as Z
import Control.Arrow ((&&&), (+++), left)
import Control.Monad (when)
import Control.Monad.Base (MonadBase)
import Control.Monad.Catch (MonadThrow)
import Control.Monad.Primitive (PrimMonad)
import Control.Monad.Trans.Resource (MonadResource)
import qualified Data.Binary.Put as P
import Data.Bits (bit, shiftL, shiftR, (.|.))
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB
import Data.Conduit.Serialization.Binary (sourcePut)
import Data.Conduit.Zlib (compress)
import Data.Digest.CRC32 (crc32)
import Data.Either (isLeft)
import Data.Maybe (fromMaybe)
import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian)
import Data.Word (Word16, Word64)
import Codec.Archive.Zip.Conduit.Types
import Codec.Archive.Zip.Conduit.Internal
data ZipOptions = ZipOptions
{ zipOpt64 :: Bool -- ^Allow zip file sizes over 4GB (reduces compatibility, but is otherwise safe for any file sizes)
, zipOptCompressLevel :: Int -- ^Compress (0 = store only, 9 = best) zipped files (improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package)
, zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file
}
defaultZipOptions :: ZipOptions
defaultZipOptions = ZipOptions
{ zipOpt64 = False
, zipOptCompressLevel = -1
, zipOptInfo = ZipInfo
{ zipComment = BS.empty
}
}
data ZipData m
= ZipDataByteString BSL.ByteString
| ZipDataSource (C.Source m BS.ByteString)
instance Monad m => Monoid (ZipData m) where
mempty = ZipDataByteString BSL.empty
mappend (ZipDataByteString a) (ZipDataByteString b) = ZipDataByteString $ mappend a b
mappend a b = ZipDataSource $ mappend (zipDataSource a) (zipDataSource b)
zipFileData :: MonadResource m => FilePath -> ZipData m
zipFileData = ZipDataSource . CB.sourceFile
zipDataSource :: Monad m => ZipData m -> C.Source m BS.ByteString
zipDataSource (ZipDataByteString b) = CB.sourceLbs b
zipDataSource (ZipDataSource s) = s
zipData :: Monad m => ZipData m -> Either (C.Source m BS.ByteString) BSL.ByteString
zipData (ZipDataByteString b) = Right b
zipData (ZipDataSource s) = Left s
dataSize :: Either a BSL.ByteString -> Maybe Word64
dataSize (Left _) = Nothing
dataSize (Right b) = Just $ fromIntegral $ BSL.length b
toDOSTime :: LocalTime -> (Word16, Word16)
toDOSTime (LocalTime (toGregorian -> (year, month, day)) (TimeOfDay hour mins secs)) =
( fromIntegral hour `shiftL` 11 .|. fromIntegral mins `shiftL` 5 .|. truncate secs `shiftR` 1
, fromIntegral (year - 1980) `shiftL` 9 .|. fromIntegral month `shiftL` 5 .|. fromIntegral day
)
zipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => ZipOptions -> C.ConduitM (ZipEntry, ZipData m) BS.ByteString m Word64
zipStream ZipOptions{..} = do
C.awaitForever $ C.toProducer . entry
return 0 -- TODO: size
where
entry (ZipEntry{..}, zipData -> dat) = do
let usiz = dataSize dat
sdat = left (C..| sizeCRC) dat
comp = zipOptCompressLevel /= 0 && all (0 /=) usiz
(cdat, csiz)
| comp =
( ((`C.fuseBoth` (compress zipOptCompressLevel deflateWindowBits C..| (fst <$> sizeCRC)))
+++ Z.compress) sdat -- level for Z.compress?
, dataSize cdat)
| otherwise = (left (fmap (id &&& fst)) sdat, usiz)
z64 = maybe zipOpt64 (zip64Size <) (max <$> usiz <*> csiz)
namelen = BS.length zipEntryName
when (namelen > fromIntegral (maxBound :: Word16)) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long"
sourcePut $ do
P.putWord32le 0x04034b50
P.putWord16le $ if z64 then 45 else 20
P.putWord16le $ if isLeft dat then bit 3 else 0
P.putWord16le $ if comp then 8 else 0
let (time, date) = toDOSTime zipEntryTime
P.putWord16le $ time
P.putWord16le $ date
P.putWord32le $ either (const 0) crc32 cdat
P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral csiz
P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral usiz
P.putWord16le $ fromIntegral namelen
P.putWord16le $ if z64 then 20 else 0
P.putByteString zipEntryName
when z64 $ do
P.putWord16le 0x0001
P.putWord16le 16
P.putWord64le $ fromMaybe 0 usiz
P.putWord64le $ fromMaybe 0 csiz
either
(\cd -> do
((usz, crc), csz) <- cd -- write compressed data
when (not z64 && (usz > zip64Size || csz > zip64Size)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled"
sourcePut $ do
P.putWord32le 0x08074b50
P.putWord32le crc
let putsz
| z64 = P.putWord64le
| otherwise = P.putWord32le . fromIntegral
putsz csz
putsz usz)
CB.sourceLbs
cdat

View File

@ -43,5 +43,5 @@ main = do
exitFailure
ZipInfo{..} <- C.runConduit
$ CB.sourceHandle stdin
C..| C.fuseUpstream unZip extract
C..| C.fuseUpstream unZipStream extract
BSC.putStrLn zipComment

View File

@ -19,6 +19,7 @@ library
exposed-modules:
Codec.Archive.Zip.Conduit.Types
Codec.Archive.Zip.Conduit.UnZip
Codec.Archive.Zip.Conduit.Zip
other-modules:
Codec.Archive.Zip.Conduit.Internal
default-language: Haskell2010
@ -33,8 +34,10 @@ library
digest,
exceptions,
primitive,
resourcet,
time,
transformers-base
transformers-base,
zlib
executable unzip-stream
main-is: unzip.hs