Start zipStream implementation
Only local headers and data so far; no central directory yet
This commit is contained in:
parent
49f413a492
commit
c460089d1a
@ -1,19 +1,26 @@
|
||||
module Codec.Archive.Zip.Conduit.Internal
|
||||
( zipError
|
||||
, idConduit
|
||||
, sizeCRC
|
||||
, zip64Size
|
||||
, deflateWindowBits
|
||||
) where
|
||||
|
||||
import Codec.Compression.Zlib.Raw (WindowBits(..))
|
||||
import Control.Monad.Catch (MonadThrow, throwM)
|
||||
import qualified Data.ByteString as BS
|
||||
import qualified Data.Conduit as C
|
||||
import Data.Digest.CRC32 (crc32Update)
|
||||
import Data.Word (Word32)
|
||||
import Data.Word (Word32, Word64)
|
||||
|
||||
import Codec.Archive.Zip.Conduit.Types
|
||||
|
||||
zipError :: MonadThrow m => String -> m a
|
||||
zipError = throwM . ZipError
|
||||
|
||||
idConduit :: Monad m => C.Conduit a m a
|
||||
idConduit = C.awaitForever C.yield
|
||||
|
||||
passthroughFold :: Monad m => (a -> b -> a) -> a -> C.ConduitM b b m a
|
||||
passthroughFold f z = C.await >>= maybe
|
||||
(return z)
|
||||
@ -21,5 +28,11 @@ passthroughFold f z = C.await >>= maybe
|
||||
C.yield x
|
||||
passthroughFold f (f z x))
|
||||
|
||||
sizeCRC :: (Monad m, Integral n) => C.ConduitM BS.ByteString BS.ByteString m (n, Word32)
|
||||
sizeCRC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m (Word64, Word32)
|
||||
sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0)
|
||||
|
||||
zip64Size :: Integral n => n
|
||||
zip64Size = 0xffffffff
|
||||
|
||||
deflateWindowBits :: WindowBits
|
||||
deflateWindowBits = WindowBits (-15)
|
||||
|
||||
@ -5,7 +5,7 @@ module Codec.Archive.Zip.Conduit.UnZip
|
||||
( ZipEntry(..)
|
||||
, ZipInfo(..)
|
||||
, ZipError
|
||||
, unZip
|
||||
, unZipStream
|
||||
) where
|
||||
|
||||
import Control.Applicative ((<|>), empty)
|
||||
@ -20,9 +20,9 @@ import qualified Data.ByteString.Char8 as BSC
|
||||
import qualified Data.Conduit as C
|
||||
import qualified Data.Conduit.List as CL
|
||||
import Data.Conduit.Serialization.Binary (sinkGet)
|
||||
import Data.Conduit.Zlib (WindowBits(..), decompress)
|
||||
import Data.Time (LocalTime(..), fromGregorian, TimeOfDay(..))
|
||||
import Data.Word (Word32, Word64)
|
||||
import Data.Conduit.Zlib (decompress)
|
||||
import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian)
|
||||
import Data.Word (Word16, Word32, Word64)
|
||||
|
||||
import Codec.Archive.Zip.Conduit.Types
|
||||
import Codec.Archive.Zip.Conduit.Internal
|
||||
@ -75,6 +75,17 @@ foldGet g z = do
|
||||
e <- G.isEmpty
|
||||
if e then return z else g z >>= foldGet g
|
||||
|
||||
fromDOSTime :: Word16 -> Word16 -> LocalTime
|
||||
fromDOSTime time date = LocalTime
|
||||
(fromGregorian
|
||||
(fromIntegral $ date `shiftR` 9 + 1980)
|
||||
(fromIntegral $ date `shiftR` 5 .&. 0x0f)
|
||||
(fromIntegral $ date .&. 0x1f))
|
||||
(TimeOfDay
|
||||
(fromIntegral $ time `shiftR` 11)
|
||||
(fromIntegral $ time `shiftR` 5 .&. 0x3f)
|
||||
(fromIntegral $ time `shiftL` 1 .&. 0x3f))
|
||||
|
||||
-- |Stream a zip file, producing a sequence of entry headers and data blocks.
|
||||
-- For example, this might produce: @Left (ZipEntry "directory\/" ...), Left (ZipEntry "directory\/file.txt" ...), Right "hello w", Right "orld!\\n", Left ...@
|
||||
-- The final result is summary information taken from the end of the zip file.
|
||||
@ -84,8 +95,8 @@ foldGet g z = do
|
||||
-- It does not (ironically) support uncompressed zip files that have been created as streams, where file sizes are not known beforehand.
|
||||
-- Since it does not use the offset information at the end of the file, it assumes all entries are packed sequentially, which is usually the case.
|
||||
-- Any errors are thrown in the underlying monad.
|
||||
unZip :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo
|
||||
unZip = next where
|
||||
unZipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo
|
||||
unZipStream = next where
|
||||
next = do
|
||||
h <- sinkGet header
|
||||
case h of
|
||||
@ -150,20 +161,10 @@ unZip = next where
|
||||
comp <- G.getWord16le
|
||||
dcomp <- case comp of
|
||||
0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data"
|
||||
| otherwise -> return $ C.awaitForever C.yield -- idConduit
|
||||
8 -> return $ decompress (WindowBits (-15))
|
||||
| otherwise -> return idConduit
|
||||
8 -> return $ decompress deflateWindowBits
|
||||
_ -> fail $ "Unsupported compression method: " ++ show comp
|
||||
time <- G.getWord16le
|
||||
date <- G.getWord16le
|
||||
let mtime = LocalTime
|
||||
(fromGregorian
|
||||
(fromIntegral $ date `shiftR` 9 + 1980)
|
||||
(fromIntegral $ date `shiftR` 5 .&. 0x0f)
|
||||
(fromIntegral $ date .&. 0x1f))
|
||||
(TimeOfDay
|
||||
(fromIntegral $ time `shiftR` 11)
|
||||
(fromIntegral $ time `shiftR` 5 .&. 0x3f)
|
||||
(fromIntegral $ time `shiftL` 1 .&. 0x3f))
|
||||
time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le
|
||||
crc <- G.getWord32le
|
||||
csiz <- G.getWord32le
|
||||
usiz <- G.getWord32le
|
||||
@ -176,8 +177,8 @@ unZip = next where
|
||||
G.isolate z $ case t of
|
||||
0x0001 -> do
|
||||
-- the zip specs claim "the Local header MUST include BOTH" but "only if the corresponding field is set to 0xFFFFFFFF"
|
||||
usiz' <- if usiz == maxBound then G.getWord64le else return $ extZip64USize ext
|
||||
csiz' <- if csiz == maxBound then G.getWord64le else return $ extZip64CSize ext
|
||||
usiz' <- if usiz == zip64Size then G.getWord64le else return $ extZip64USize ext
|
||||
csiz' <- if csiz == zip64Size then G.getWord64le else return $ extZip64CSize ext
|
||||
return ext
|
||||
{ extZip64 = True
|
||||
, extZip64USize = usiz'
|
||||
@ -207,7 +208,7 @@ unZip = next where
|
||||
return FileHeader
|
||||
{ fileEntry = ZipEntry
|
||||
{ zipEntryName = name
|
||||
, zipEntryTime = mtime
|
||||
, zipEntryTime = time
|
||||
, zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize
|
||||
}
|
||||
, fileDecompress = dcomp
|
||||
|
||||
132
Codec/Archive/Zip/Conduit/Zip.hs
Normal file
132
Codec/Archive/Zip/Conduit/Zip.hs
Normal file
@ -0,0 +1,132 @@
|
||||
{-# LANGUAGE RecordWildCards #-}
|
||||
{-# LANGUAGE ViewPatterns #-}
|
||||
module Codec.Archive.Zip.Conduit.Zip
|
||||
( ZipOptions(..)
|
||||
, defaultZipOptions
|
||||
, ZipEntry(..)
|
||||
, ZipData(..)
|
||||
, zipFileData
|
||||
, zipStream
|
||||
) where
|
||||
|
||||
import qualified Codec.Compression.Zlib.Raw as Z
|
||||
import Control.Arrow ((&&&), (+++), left)
|
||||
import Control.Monad (when)
|
||||
import Control.Monad.Base (MonadBase)
|
||||
import Control.Monad.Catch (MonadThrow)
|
||||
import Control.Monad.Primitive (PrimMonad)
|
||||
import Control.Monad.Trans.Resource (MonadResource)
|
||||
import qualified Data.Binary.Put as P
|
||||
import Data.Bits (bit, shiftL, shiftR, (.|.))
|
||||
import qualified Data.ByteString as BS
|
||||
import qualified Data.ByteString.Char8 as BSC
|
||||
import qualified Data.ByteString.Lazy as BSL
|
||||
import qualified Data.Conduit as C
|
||||
import qualified Data.Conduit.Binary as CB
|
||||
import Data.Conduit.Serialization.Binary (sourcePut)
|
||||
import Data.Conduit.Zlib (compress)
|
||||
import Data.Digest.CRC32 (crc32)
|
||||
import Data.Either (isLeft)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian)
|
||||
import Data.Word (Word16, Word64)
|
||||
|
||||
import Codec.Archive.Zip.Conduit.Types
|
||||
import Codec.Archive.Zip.Conduit.Internal
|
||||
|
||||
data ZipOptions = ZipOptions
|
||||
{ zipOpt64 :: Bool -- ^Allow zip file sizes over 4GB (reduces compatibility, but is otherwise safe for any file sizes)
|
||||
, zipOptCompressLevel :: Int -- ^Compress (0 = store only, 9 = best) zipped files (improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package)
|
||||
, zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file
|
||||
}
|
||||
|
||||
defaultZipOptions :: ZipOptions
|
||||
defaultZipOptions = ZipOptions
|
||||
{ zipOpt64 = False
|
||||
, zipOptCompressLevel = -1
|
||||
, zipOptInfo = ZipInfo
|
||||
{ zipComment = BS.empty
|
||||
}
|
||||
}
|
||||
|
||||
data ZipData m
|
||||
= ZipDataByteString BSL.ByteString
|
||||
| ZipDataSource (C.Source m BS.ByteString)
|
||||
|
||||
instance Monad m => Monoid (ZipData m) where
|
||||
mempty = ZipDataByteString BSL.empty
|
||||
mappend (ZipDataByteString a) (ZipDataByteString b) = ZipDataByteString $ mappend a b
|
||||
mappend a b = ZipDataSource $ mappend (zipDataSource a) (zipDataSource b)
|
||||
|
||||
zipFileData :: MonadResource m => FilePath -> ZipData m
|
||||
zipFileData = ZipDataSource . CB.sourceFile
|
||||
|
||||
zipDataSource :: Monad m => ZipData m -> C.Source m BS.ByteString
|
||||
zipDataSource (ZipDataByteString b) = CB.sourceLbs b
|
||||
zipDataSource (ZipDataSource s) = s
|
||||
|
||||
zipData :: Monad m => ZipData m -> Either (C.Source m BS.ByteString) BSL.ByteString
|
||||
zipData (ZipDataByteString b) = Right b
|
||||
zipData (ZipDataSource s) = Left s
|
||||
|
||||
dataSize :: Either a BSL.ByteString -> Maybe Word64
|
||||
dataSize (Left _) = Nothing
|
||||
dataSize (Right b) = Just $ fromIntegral $ BSL.length b
|
||||
|
||||
toDOSTime :: LocalTime -> (Word16, Word16)
|
||||
toDOSTime (LocalTime (toGregorian -> (year, month, day)) (TimeOfDay hour mins secs)) =
|
||||
( fromIntegral hour `shiftL` 11 .|. fromIntegral mins `shiftL` 5 .|. truncate secs `shiftR` 1
|
||||
, fromIntegral (year - 1980) `shiftL` 9 .|. fromIntegral month `shiftL` 5 .|. fromIntegral day
|
||||
)
|
||||
|
||||
zipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => ZipOptions -> C.ConduitM (ZipEntry, ZipData m) BS.ByteString m Word64
|
||||
zipStream ZipOptions{..} = do
|
||||
C.awaitForever $ C.toProducer . entry
|
||||
return 0 -- TODO: size
|
||||
where
|
||||
entry (ZipEntry{..}, zipData -> dat) = do
|
||||
let usiz = dataSize dat
|
||||
sdat = left (C..| sizeCRC) dat
|
||||
comp = zipOptCompressLevel /= 0 && all (0 /=) usiz
|
||||
(cdat, csiz)
|
||||
| comp =
|
||||
( ((`C.fuseBoth` (compress zipOptCompressLevel deflateWindowBits C..| (fst <$> sizeCRC)))
|
||||
+++ Z.compress) sdat -- level for Z.compress?
|
||||
, dataSize cdat)
|
||||
| otherwise = (left (fmap (id &&& fst)) sdat, usiz)
|
||||
z64 = maybe zipOpt64 (zip64Size <) (max <$> usiz <*> csiz)
|
||||
namelen = BS.length zipEntryName
|
||||
when (namelen > fromIntegral (maxBound :: Word16)) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long"
|
||||
sourcePut $ do
|
||||
P.putWord32le 0x04034b50
|
||||
P.putWord16le $ if z64 then 45 else 20
|
||||
P.putWord16le $ if isLeft dat then bit 3 else 0
|
||||
P.putWord16le $ if comp then 8 else 0
|
||||
let (time, date) = toDOSTime zipEntryTime
|
||||
P.putWord16le $ time
|
||||
P.putWord16le $ date
|
||||
P.putWord32le $ either (const 0) crc32 cdat
|
||||
P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral csiz
|
||||
P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral usiz
|
||||
P.putWord16le $ fromIntegral namelen
|
||||
P.putWord16le $ if z64 then 20 else 0
|
||||
P.putByteString zipEntryName
|
||||
when z64 $ do
|
||||
P.putWord16le 0x0001
|
||||
P.putWord16le 16
|
||||
P.putWord64le $ fromMaybe 0 usiz
|
||||
P.putWord64le $ fromMaybe 0 csiz
|
||||
either
|
||||
(\cd -> do
|
||||
((usz, crc), csz) <- cd -- write compressed data
|
||||
when (not z64 && (usz > zip64Size || csz > zip64Size)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled"
|
||||
sourcePut $ do
|
||||
P.putWord32le 0x08074b50
|
||||
P.putWord32le crc
|
||||
let putsz
|
||||
| z64 = P.putWord64le
|
||||
| otherwise = P.putWord32le . fromIntegral
|
||||
putsz csz
|
||||
putsz usz)
|
||||
CB.sourceLbs
|
||||
cdat
|
||||
@ -43,5 +43,5 @@ main = do
|
||||
exitFailure
|
||||
ZipInfo{..} <- C.runConduit
|
||||
$ CB.sourceHandle stdin
|
||||
C..| C.fuseUpstream unZip extract
|
||||
C..| C.fuseUpstream unZipStream extract
|
||||
BSC.putStrLn zipComment
|
||||
|
||||
@ -19,6 +19,7 @@ library
|
||||
exposed-modules:
|
||||
Codec.Archive.Zip.Conduit.Types
|
||||
Codec.Archive.Zip.Conduit.UnZip
|
||||
Codec.Archive.Zip.Conduit.Zip
|
||||
other-modules:
|
||||
Codec.Archive.Zip.Conduit.Internal
|
||||
default-language: Haskell2010
|
||||
@ -33,8 +34,10 @@ library
|
||||
digest,
|
||||
exceptions,
|
||||
primitive,
|
||||
resourcet,
|
||||
time,
|
||||
transformers-base
|
||||
transformers-base,
|
||||
zlib
|
||||
|
||||
executable unzip-stream
|
||||
main-is: unzip.hs
|
||||
|
||||
Loading…
Reference in New Issue
Block a user