Generalize beyond IO monad
This commit is contained in:
parent
05a04383f8
commit
fed591ecfb
@ -1,13 +1,19 @@
|
|||||||
|
-- |Stream the extraction of a zip file, e.g., as it's being downloaded.
|
||||||
{-# LANGUAGE RecordWildCards #-}
|
{-# LANGUAGE RecordWildCards #-}
|
||||||
{-# LANGUAGE RankNTypes #-}
|
{-# LANGUAGE RankNTypes #-}
|
||||||
module Codec.Archive.Zip.Conduit.UnZip
|
module Codec.Archive.Zip.Conduit.UnZip
|
||||||
( ZipEntry(..)
|
( ZipEntry(..)
|
||||||
, ZipInfo(..)
|
, ZipInfo(..)
|
||||||
|
, ZipError
|
||||||
, unZip
|
, unZip
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Applicative ((<|>), empty)
|
import Control.Applicative ((<|>), empty)
|
||||||
|
import Control.Exception (Exception(..))
|
||||||
import Control.Monad (when, unless, guard)
|
import Control.Monad (when, unless, guard)
|
||||||
|
import Control.Monad.Base (MonadBase)
|
||||||
|
import Control.Monad.Catch (MonadThrow, throwM)
|
||||||
|
import Control.Monad.Primitive (PrimMonad)
|
||||||
import qualified Data.Binary.Get as G
|
import qualified Data.Binary.Get as G
|
||||||
import Data.Bits ((.&.), complement, testBit, shiftL, shiftR)
|
import Data.Bits ((.&.), complement, testBit, shiftL, shiftR)
|
||||||
import qualified Data.ByteString as BS
|
import qualified Data.ByteString as BS
|
||||||
@ -17,18 +23,29 @@ import Data.Conduit.Serialization.Binary (sinkGet)
|
|||||||
import Data.Conduit.Zlib (WindowBits(..), decompress)
|
import Data.Conduit.Zlib (WindowBits(..), decompress)
|
||||||
import Data.Digest.CRC32 (crc32Update)
|
import Data.Digest.CRC32 (crc32Update)
|
||||||
import Data.Time (UTCTime(..), fromGregorian, timeOfDayToTime, TimeOfDay(..))
|
import Data.Time (UTCTime(..), fromGregorian, timeOfDayToTime, TimeOfDay(..))
|
||||||
|
import Data.Typeable (Typeable)
|
||||||
import Data.Word (Word32, Word64)
|
import Data.Word (Word32, Word64)
|
||||||
|
|
||||||
|
-- |(The beginning of) a single entry in a zip stream, which may be any file or directory.
|
||||||
|
-- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that.
|
||||||
data ZipEntry = ZipEntry
|
data ZipEntry = ZipEntry
|
||||||
{ zipEntryName :: BS.ByteString
|
{ zipEntryName :: BS.ByteString -- ^File name, usually utf-8 encoded, with a trailing slash for directories
|
||||||
, zipEntryTime :: UTCTime
|
, zipEntryTime :: UTCTime -- ^Modification time
|
||||||
, zipEntrySize :: !Word64
|
, zipEntrySize :: !Word64 -- ^Size of file data (not checked)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
-- |Summary information at the end of a zip stream.
|
||||||
data ZipInfo = ZipInfo
|
data ZipInfo = ZipInfo
|
||||||
{ zipComment :: BS.ByteString
|
{ zipComment :: BS.ByteString
|
||||||
}
|
}
|
||||||
|
|
||||||
|
-- |Errors thrown during zip file processing
|
||||||
|
newtype ZipError = ZipError String
|
||||||
|
deriving (Show, Typeable)
|
||||||
|
|
||||||
|
instance Exception ZipError where
|
||||||
|
displayException (ZipError e) = "ZipError: " ++ e
|
||||||
|
|
||||||
data Header m
|
data Header m
|
||||||
= FileHeader
|
= FileHeader
|
||||||
{ fileDecompress :: C.Conduit BS.ByteString m BS.ByteString
|
{ fileDecompress :: C.Conduit BS.ByteString m BS.ByteString
|
||||||
@ -56,10 +73,13 @@ data ExtField = ExtField
|
|||||||
}
|
}
|
||||||
-}
|
-}
|
||||||
|
|
||||||
pass :: (Monad m, Integral n) => n -> C.Conduit BS.ByteString m BS.ByteString
|
zipError :: MonadThrow m => String -> m a
|
||||||
|
zipError = throwM . ZipError
|
||||||
|
|
||||||
|
pass :: (MonadThrow m, Integral n) => n -> C.Conduit BS.ByteString m BS.ByteString
|
||||||
pass 0 = return ()
|
pass 0 = return ()
|
||||||
pass n = C.await >>= maybe
|
pass n = C.await >>= maybe
|
||||||
(fail $ "EOF in file data, expecting " ++ show ni ++ " more bytes")
|
(zipError $ "EOF in file data, expecting " ++ show ni ++ " more bytes")
|
||||||
(\b ->
|
(\b ->
|
||||||
let n' = ni - toInteger (BS.length b) in
|
let n' = ni - toInteger (BS.length b) in
|
||||||
if n' < 0
|
if n' < 0
|
||||||
@ -75,15 +95,24 @@ pass n = C.await >>= maybe
|
|||||||
crc32 :: Monad m => C.Consumer BS.ByteString m Word32
|
crc32 :: Monad m => C.Consumer BS.ByteString m Word32
|
||||||
crc32 = CL.fold crc32Update 0
|
crc32 = CL.fold crc32Update 0
|
||||||
|
|
||||||
checkCRC :: Monad m => Word32 -> C.Conduit BS.ByteString m BS.ByteString
|
checkCRC :: MonadThrow m => Word32 -> C.Conduit BS.ByteString m BS.ByteString
|
||||||
checkCRC t = C.passthroughSink crc32 $ \r -> unless (r == t) $ fail "CRC32 mismatch"
|
checkCRC t = C.passthroughSink crc32 $ \r -> unless (r == t) $ zipError "CRC32 mismatch"
|
||||||
|
|
||||||
foldGet :: (a -> G.Get a) -> a -> G.Get a
|
foldGet :: (a -> G.Get a) -> a -> G.Get a
|
||||||
foldGet g z = do
|
foldGet g z = do
|
||||||
e <- G.isEmpty
|
e <- G.isEmpty
|
||||||
if e then return z else g z >>= foldGet g
|
if e then return z else g z >>= foldGet g
|
||||||
|
|
||||||
unZip :: C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) IO ZipInfo
|
-- |Stream a zip file, producing a sequence of entry headers and data blocks.
|
||||||
|
-- For example, this might produce: @Left (ZipEntry "directory/" ...), Left (ZipEntry "directory/file.txt" ...), Right "hello w", Right "orld!\n", Left ...@
|
||||||
|
-- The final result is summary information taken from the end of the zip file.
|
||||||
|
-- No state in maintained during processing, and in particular, any file-specific information in the final central directory is discarded.
|
||||||
|
--
|
||||||
|
-- This only supports a limited number of zip file features, including deflate compression and zip64.
|
||||||
|
-- It does not support streaming zip files, where file sizes are not known beforehand (though this could potentially be fixed for some cases).
|
||||||
|
-- Since it does not use the offset information at the end of the file, it assumes all entries are packed sequentially, which is usually the case.
|
||||||
|
-- Any errors are thrown in the underlying monad.
|
||||||
|
unZip :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo
|
||||||
unZip = next where
|
unZip = next where
|
||||||
next = do
|
next = do
|
||||||
h <- sinkGet header
|
h <- sinkGet header
|
||||||
|
|||||||
@ -21,11 +21,14 @@ library
|
|||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
ghc-options: -Wall
|
ghc-options: -Wall
|
||||||
build-depends:
|
build-depends:
|
||||||
base >= 4.7 && < 5,
|
base >= 4.8 && < 5,
|
||||||
binary >= 0.7.2,
|
binary >= 0.7.2,
|
||||||
binary-conduit,
|
binary-conduit,
|
||||||
bytestring,
|
bytestring,
|
||||||
conduit,
|
conduit,
|
||||||
conduit-extra,
|
conduit-extra,
|
||||||
digest,
|
digest,
|
||||||
time
|
exceptions,
|
||||||
|
primitive,
|
||||||
|
time,
|
||||||
|
transformers-base
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user