Compare commits

...

2 Commits

Author SHA1 Message Date
Gregor Kleen
843683d024 throw utf8 errors 2020-09-10 12:45:54 +02:00
Gregor Kleen
094c70935f Use only one zlib-library 2020-09-08 14:45:19 +02:00
11 changed files with 117 additions and 37 deletions

View File

@ -1,5 +1,6 @@
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-} {-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Codec.Archive.Zip.Conduit.Internal module Codec.Archive.Zip.Conduit.Internal
( osVersion, zipVersion ( osVersion, zipVersion
, zipError , zipError
@ -8,16 +9,21 @@ module Codec.Archive.Zip.Conduit.Internal
, outputSize , outputSize
, inputSize , inputSize
, maxBound32 , maxBound32
, deflateWindowBits , compressStream, decompressStream
) where ) where
import Codec.Compression.Zlib.Raw (WindowBits(..))
import Control.Monad.Catch (MonadThrow, throwM) import Control.Monad.Catch (MonadThrow, throwM)
import qualified Data.ByteString as BS import qualified Data.ByteString as BS
import qualified Data.Conduit as C import qualified Data.Conduit as C
import qualified Data.Conduit.Internal as CI import qualified Data.Conduit.Internal as CI
import Data.Digest.CRC32 (crc32Update) import Data.Digest.CRC32 (crc32Update)
import Data.Word (Word8, Word32, Word64) import Data.Word (Word8, Word32, Word64)
import qualified Codec.Compression.Zlib.Raw as Z
import qualified Codec.Compression.Zlib.Internal as Z
import Control.Monad.Primitive
import Data.Maybe (fromMaybe)
import Control.Monad.Trans.Class (MonadTrans(lift))
import qualified Control.Monad.ST.Lazy as STL
import Codec.Archive.Zip.Conduit.Types import Codec.Archive.Zip.Conduit.Types
@ -71,5 +77,33 @@ inputSize (CI.ConduitM src) = CI.ConduitM $ \rest -> let
maxBound32 :: Integral n => n maxBound32 :: Integral n => n
maxBound32 = fromIntegral (maxBound :: Word32) maxBound32 = fromIntegral (maxBound :: Word32)
deflateWindowBits :: WindowBits
deflateWindowBits = WindowBits (-15) awaitNonNull :: forall m o. Monad m => C.ConduitT BS.ByteString o m (Maybe BS.ByteString)
awaitNonNull = do
next <- C.await
case next of
Nothing -> return Nothing
Just bs
| BS.null bs -> awaitNonNull
| otherwise -> return $ Just bs
compressStream :: forall m.
PrimMonad m
=> Z.CompressParams
-> C.ConduitT BS.ByteString BS.ByteString m ()
compressStream params = C.transPipe primToPrim . go $ Z.compressST Z.rawFormat params
where
go Z.CompressStreamEnd = return ()
go (Z.CompressOutputAvailable outBS cont) = C.yield outBS >> lift cont >>= go
go (Z.CompressInputRequired cont) = awaitNonNull >>= lift . cont . fromMaybe BS.empty >>= go
decompressStream :: forall m.
( MonadThrow m, PrimMonad m )
=> C.ConduitT BS.ByteString BS.ByteString m ()
decompressStream = go $ Z.decompressST Z.rawFormat Z.defaultDecompressParams
where
go :: Z.DecompressStream (STL.ST (PrimState m)) -> C.ConduitT BS.ByteString BS.ByteString m ()
go (Z.DecompressStreamEnd unconsumed) = C.leftover unconsumed
go (Z.DecompressOutputAvailable outBS cont) = C.yield outBS >> lift (primToPrim cont) >>= go
go (Z.DecompressInputRequired cont) = awaitNonNull >>= lift . primToPrim . cont . fromMaybe BS.empty >>= go
go (Z.DecompressStreamError err) = throwM err

View File

@ -4,8 +4,7 @@ import Control.Exception (Exception(..))
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as BSL import qualified Data.ByteString.Lazy as BSL
import qualified Data.Conduit as C import qualified Data.Conduit as C
import Data.Conduit.Binary (sourceLbs) import qualified Data.Conduit.Combinators as C
import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..)) import Data.String (IsString(..))
import qualified Data.Text as T import qualified Data.Text as T
import Data.Time.LocalTime (LocalTime) import Data.Time.LocalTime (LocalTime)
@ -51,7 +50,7 @@ instance Monad m => Monoid (ZipData m) where
-- |Normalize any 'ZipData' to a simple source -- |Normalize any 'ZipData' to a simple source
sourceZipData :: Monad m => ZipData m -> C.ConduitM () ByteString m () sourceZipData :: Monad m => ZipData m -> C.ConduitM () ByteString m ()
sourceZipData (ZipDataByteString b) = sourceLbs b sourceZipData (ZipDataByteString b) = C.sourceLazy b
sourceZipData (ZipDataSource s) = s sourceZipData (ZipDataSource s) = s
-- |Convert between unpacked (as 'Codec.Archive.Zip.Conduit.UnZip.unZipStream' produces) and packed (as 'Codec.Archive.Zip.Conduit.Zip.zipStream' consumes) representations. -- |Convert between unpacked (as 'Codec.Archive.Zip.Conduit.UnZip.unZipStream' produces) and packed (as 'Codec.Archive.Zip.Conduit.Zip.zipStream' consumes) representations.

View File

@ -22,7 +22,6 @@ import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL import qualified Data.Conduit.List as CL
import Data.Conduit.Serialization.Binary (sinkGet) import Data.Conduit.Serialization.Binary (sinkGet)
import qualified Data.Conduit.Zlib as CZ
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.Encoding as TE import qualified Data.Text.Encoding as TE
import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian) import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian)
@ -173,7 +172,7 @@ unZipStream = next where
dcomp <- case comp of dcomp <- case comp of
0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data" 0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data"
| otherwise -> return idConduit | otherwise -> return idConduit
8 -> return $ CZ.decompress deflateWindowBits 8 -> return decompressStream
_ -> fail $ "Unsupported compression method: " ++ show comp _ -> fail $ "Unsupported compression method: " ++ show comp
time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le
crc <- G.getWord32le crc <- G.getWord32le
@ -182,6 +181,7 @@ unZipStream = next where
nlen <- fromIntegral <$> G.getWord16le nlen <- fromIntegral <$> G.getWord16le
elen <- fromIntegral <$> G.getWord16le elen <- fromIntegral <$> G.getWord16le
name <- G.getByteString nlen name <- G.getByteString nlen
dName <- if testBit gpf 11 then Left <$> either (fail . show) return (TE.decodeUtf8' name) else return $ Right name
let getExt ext = do let getExt ext = do
t <- G.getWord16le t <- G.getWord16le
z <- fromIntegral <$> G.getWord16le z <- fromIntegral <$> G.getWord16le
@ -218,7 +218,7 @@ unZipStream = next where
} }
return FileHeader return FileHeader
{ fileEntry = ZipEntry { fileEntry = ZipEntry
{ zipEntryName = if testBit gpf 11 then Left (TE.decodeUtf8 name) else Right name { zipEntryName = dName
, zipEntryTime = time , zipEntryTime = time
, zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize , zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize
, zipEntryExternalAttributes = Nothing , zipEntryExternalAttributes = Nothing

View File

@ -5,6 +5,8 @@
module Codec.Archive.Zip.Conduit.Zip module Codec.Archive.Zip.Conduit.Zip
( zipStream ( zipStream
, ZipOptions(..) , ZipOptions(..)
, Z.CompressionLevel
, Z.defaultCompression, Z.noCompression, Z.bestSpeed, Z.bestCompression, Z.compressionLevel
, ZipInfo(..) , ZipInfo(..)
, defaultZipOptions , defaultZipOptions
, ZipEntry(..) , ZipEntry(..)
@ -28,10 +30,9 @@ import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC import qualified Data.ByteString.Char8 as BSC
import qualified Data.ByteString.Lazy as BSL import qualified Data.ByteString.Lazy as BSL
import qualified Data.Conduit as C import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB import qualified Data.Conduit.Combinators as C
import Data.Conduit.Lift (stateC, execStateC) import Data.Conduit.Lift (stateC, execStateC)
import Data.Conduit.Serialization.Binary (sourcePut) import Data.Conduit.Serialization.Binary (sourcePut)
import qualified Data.Conduit.Zlib as CZ
import Data.Digest.CRC32 (crc32) import Data.Digest.CRC32 (crc32)
import Data.Either (isLeft) import Data.Either (isLeft)
import Data.Maybe (fromMaybe, fromJust) import Data.Maybe (fromMaybe, fromJust)
@ -46,14 +47,14 @@ import Codec.Archive.Zip.Conduit.Internal
-- |Options controlling zip file parameters and features -- |Options controlling zip file parameters and features
data ZipOptions = ZipOptions data ZipOptions = ZipOptions
{ zipOpt64 :: Bool -- ^Allow 'ZipDataSource's over 4GB (reduces compatibility in some cases); this is automatically enabled for any files of known size (e.g., 'zipEntrySize') { zipOpt64 :: Bool -- ^Allow 'ZipDataSource's over 4GB (reduces compatibility in some cases); this is automatically enabled for any files of known size (e.g., 'zipEntrySize')
, zipOptCompressLevel :: Int -- ^Compress (0 = store only, 9 = best) zipped files (improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package) , zipOptCompressLevel :: Z.CompressionLevel
, zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file , zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file
} }
defaultZipOptions :: ZipOptions defaultZipOptions :: ZipOptions
defaultZipOptions = ZipOptions defaultZipOptions = ZipOptions
{ zipOpt64 = False { zipOpt64 = False
, zipOptCompressLevel = -1 , zipOptCompressLevel = Z.defaultCompression
, zipOptInfo = ZipInfo , zipOptInfo = ZipInfo
{ zipComment = BS.empty { zipComment = BS.empty
} }
@ -64,9 +65,9 @@ infixr 7 ?*
True ?* x = x True ?* x = x
False ?* _ = 0 False ?* _ = 0
-- |Use a file on disk as 'ZipData' (@'ZipDataSource' . 'CB.sourceFile'@). -- |Use a file on disk as 'ZipData' (@'ZipDataSource' . 'C.sourceFile'@).
zipFileData :: MonadResource m => FilePath -> ZipData m zipFileData :: MonadResource m => FilePath -> ZipData m
zipFileData = ZipDataSource . CB.sourceFile zipFileData = ZipDataSource . C.sourceFile
zipData :: Monad m => ZipData m -> Either (C.ConduitM () BS.ByteString m ()) BSL.ByteString zipData :: Monad m => ZipData m -> Either (C.ConduitM () BS.ByteString m ()) BSL.ByteString
zipData (ZipDataByteString b) = Right b zipData (ZipDataByteString b) = Right b
@ -120,13 +121,14 @@ zipStream ZipOptions{..} = execStateC 0 $ do
entry (ZipEntry{..}, zipData -> dat) = do entry (ZipEntry{..}, zipData -> dat) = do
let usiz = dataSize dat let usiz = dataSize dat
sdat = left ((C..| sizeCRC) . C.toProducer) dat sdat = left ((C..| sizeCRC) . C.toProducer) dat
comp = zipOptCompressLevel /= 0 comp = zipOptCompressLevel /= Z.noCompression
&& all (0 /=) usiz && all (0 /=) usiz
&& all (0 /=) zipEntrySize && all (0 /=) zipEntrySize
compressParams = Z.defaultCompressParams { Z.compressLevel = zipOptCompressLevel }
(cdat, csiz) (cdat, csiz)
| comp = | comp =
( ((`C.fuseBoth` (outputSize $ CZ.compress zipOptCompressLevel deflateWindowBits)) ( ((`C.fuseBoth` (outputSize $ compressStream compressParams))
+++ Z.compress) sdat -- level for Z.compress? +++ Z.compressWith compressParams) sdat
, dataSize cdat) , dataSize cdat)
| otherwise = (left (fmap (id &&& fst)) sdat, usiz) | otherwise = (left (fmap (id &&& fst)) sdat, usiz)
z64 = maybe (zipOpt64 || any (maxBound32 <) zipEntrySize) z64 = maybe (zipOpt64 || any (maxBound32 <) zipEntrySize)
@ -172,7 +174,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do
putsz csz putsz csz
putsz usz putsz usz
return r) return r)
(\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b) (\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ C.sourceLazy b)
cdat cdat
when (any (usz /=) zipEntrySize) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": incorrect zipEntrySize" when (any (usz /=) zipEntrySize) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": incorrect zipEntrySize"
return $ do return $ do

View File

@ -5,7 +5,7 @@ import Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString as BS import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB import qualified Data.Conduit.Combinators as C
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.IO as TIO import qualified Data.Text.IO as TIO
import Data.Time.LocalTime (localTimeToUTC, utc) import Data.Time.LocalTime (localTimeToUTC, utc)
@ -18,7 +18,7 @@ import System.Directory (createDirectoryIfMissing
import System.Environment (getProgName, getArgs) import System.Environment (getProgName, getArgs)
import System.Exit (exitFailure) import System.Exit (exitFailure)
import System.FilePath.Posix (takeDirectory) -- zip files only use forward slashes import System.FilePath.Posix (takeDirectory) -- zip files only use forward slashes
import System.IO (stdin, openFile, IOMode(WriteMode), hClose, hSetFileSize, hPutStrLn, stderr) import System.IO (openFile, IOMode(WriteMode), hClose, hSetFileSize, hPutStrLn, stderr)
import Codec.Archive.Zip.Conduit.UnZip import Codec.Archive.Zip.Conduit.UnZip
@ -32,7 +32,7 @@ extract = C.awaitForever start where
else do -- C.bracketP else do -- C.bracketP
h <- liftIO $ openFile name WriteMode h <- liftIO $ openFile name WriteMode
mapM_ (liftIO . hSetFileSize h . toInteger) zipEntrySize mapM_ (liftIO . hSetFileSize h . toInteger) zipEntrySize
write C..| CB.sinkHandle h write C..| C.sinkHandle h
liftIO $ hClose h liftIO $ hClose h
#if MIN_VERSION_directory(1,2,3) #if MIN_VERSION_directory(1,2,3)
liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone
@ -53,6 +53,6 @@ main = do
hPutStrLn stderr $ "Usage: " ++ prog ++ "\nRead a zip file from stdin and extract it in the current directory." hPutStrLn stderr $ "Usage: " ++ prog ++ "\nRead a zip file from stdin and extract it in the current directory."
exitFailure exitFailure
ZipInfo{..} <- C.runConduit ZipInfo{..} <- C.runConduit
$ CB.sourceHandle stdin $ C.stdin
C..| C.fuseUpstream unZipStream extract C..| C.fuseUpstream unZipStream extract
BSC.putStrLn zipComment BSC.putStrLn zipComment

View File

@ -5,7 +5,7 @@ import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.Trans.Resource (MonadResource, runResourceT) import Control.Monad.Trans.Resource (MonadResource, runResourceT)
import qualified Data.ByteString.Char8 as BSC import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB import qualified Data.Conduit.Combinators as C
import Data.List (foldl') import Data.List (foldl')
import qualified Data.Text as T import qualified Data.Text as T
import Data.Time.LocalTime (utcToLocalTime, utc) import Data.Time.LocalTime (utcToLocalTime, utc)
@ -25,15 +25,15 @@ import System.Directory (doesDirectoryExist, getModificationTime
import System.Environment (getProgName, getArgs) import System.Environment (getProgName, getArgs)
import System.Exit (exitFailure) import System.Exit (exitFailure)
import System.FilePath.Posix ((</>)) -- zip files only want forward slashes import System.FilePath.Posix ((</>)) -- zip files only want forward slashes
import System.IO (stdout, hPutStrLn, stderr) import System.IO (hPutStrLn, stderr)
import Codec.Archive.Zip.Conduit.Zip import Codec.Archive.Zip.Conduit.Zip
opts :: [Opt.OptDescr (ZipOptions -> ZipOptions)] opts :: [Opt.OptDescr (ZipOptions -> ZipOptions)]
opts = opts =
[ Opt.Option "z" ["compress"] (Opt.ReqArg (\l o -> o{ zipOptCompressLevel = read l }) "LEVEL") [ Opt.Option "z" ["compress"] (Opt.ReqArg (\l o -> o{ zipOptCompressLevel = compressionLevel $ read l }) "LEVEL")
"set compression level for files (0-9)" "set compression level for files (0-9)"
, Opt.Option "0" ["store"] (Opt.NoArg (\o -> o{ zipOptCompressLevel = 0 })) , Opt.Option "0" ["store"] (Opt.NoArg (\o -> o{ zipOptCompressLevel = noCompression }))
"don't compress files (-z0)" "don't compress files (-z0)"
, Opt.Option "e" ["zip64"] (Opt.NoArg (\o -> o{ zipOpt64 = True })) , Opt.Option "e" ["zip64"] (Opt.NoArg (\o -> o{ zipOpt64 = True }))
"enable zip64 support for files over 4GB" "enable zip64 support for files over 4GB"
@ -85,4 +85,4 @@ main = do
runResourceT $ C.runConduit runResourceT $ C.runConduit
$ generate paths $ generate paths
C..| void (zipStream opt) C..| void (zipStream opt)
C..| CB.sinkHandle stdout C..| C.stdout

10
nixpkgs.nix Normal file
View File

@ -0,0 +1,10 @@
{ nixpkgs ? import <nixpkgs>
}:
import ((nixpkgs {}).fetchFromGitHub {
owner = "NixOS";
repo = "nixpkgs";
rev = "bc00ecedfa709f4fa91d445dd76ecd792cb2c728";
sha256 = "0plhwb04srr4b0h7w8qlqi207a19szz2wqz6r4gmic856jlkchaa";
fetchSubmodules = true;
})

13
stack.nix Normal file
View File

@ -0,0 +1,13 @@
{ ghc, nixpkgs ? import ./nixpkgs.nix {} }:
let
haskellPackages = pkgs.haskellPackages;
inherit (nixpkgs {}) pkgs;
in pkgs.haskell.lib.buildStackProject {
inherit ghc;
inherit (haskellPackages) stack;
name = "stackenv";
buildInputs = (with pkgs;
[ zlib
]);
}

View File

@ -1,4 +1,9 @@
resolver: lts-12.14 resolver: lts-16.13
packages: packages:
- '.' - '.'
extra-deps: [] extra-deps:
- primitive-0.7.1.0@sha256:6a237bb338bcc43193077ff8e8c0f0ce2de14c652231496a15672e8b563a07e2,2604
nix:
packages: []
shell-file: ./stack.nix
add-gc-roots: true

19
stack.yaml.lock Normal file
View File

@ -0,0 +1,19 @@
# This file was autogenerated by Stack.
# You should not edit this file by hand.
# For more information, please see the documentation at:
# https://docs.haskellstack.org/en/stable/lock_files
packages:
- completed:
hackage: primitive-0.7.1.0@sha256:6a237bb338bcc43193077ff8e8c0f0ce2de14c652231496a15672e8b563a07e2,2604
pantry-tree:
size: 1376
sha256: 924e88629b493abb6b2f3c3029cef076554a2b627091e3bb6887ec03487a707d
original:
hackage: primitive-0.7.1.0@sha256:6a237bb338bcc43193077ff8e8c0f0ce2de14c652231496a15672e8b563a07e2,2604
snapshots:
- completed:
size: 532381
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/16/13.yaml
sha256: 6ee17f7996e5bc75ae4406250841f1362ad4196418a4d90a0615ff4f26ac98df
original: lts-16.13

View File

@ -30,14 +30,14 @@ library
binary-conduit, binary-conduit,
bytestring, bytestring,
conduit, conduit,
conduit-extra,
digest, digest,
exceptions, exceptions,
mtl, mtl,
primitive, primitive >= 0.7.1.0,
resourcet, resourcet,
text, text,
time, time,
transformers,
transformers-base, transformers-base,
zlib zlib
@ -50,7 +50,6 @@ executable unzip-stream
base >=4.8 && <5, base >=4.8 && <5,
bytestring, bytestring,
conduit, conduit,
conduit-extra,
directory, directory,
filepath, filepath,
text, text,
@ -67,7 +66,6 @@ executable zip-stream
base >=4.8 && <5, base >=4.8 && <5,
bytestring, bytestring,
conduit, conduit,
conduit-extra,
directory, directory,
filepath, filepath,
resourcet, resourcet,