Compare commits

...

2 Commits

Author SHA1 Message Date
Gregor Kleen
843683d024 throw utf8 errors 2020-09-10 12:45:54 +02:00
Gregor Kleen
094c70935f Use only one zlib-library 2020-09-08 14:45:19 +02:00
11 changed files with 117 additions and 37 deletions

View File

@ -1,5 +1,6 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Codec.Archive.Zip.Conduit.Internal
( osVersion, zipVersion
, zipError
@ -8,16 +9,21 @@ module Codec.Archive.Zip.Conduit.Internal
, outputSize
, inputSize
, maxBound32
, deflateWindowBits
, compressStream, decompressStream
) where
import Codec.Compression.Zlib.Raw (WindowBits(..))
import Control.Monad.Catch (MonadThrow, throwM)
import qualified Data.ByteString as BS
import qualified Data.Conduit as C
import qualified Data.Conduit.Internal as CI
import Data.Digest.CRC32 (crc32Update)
import Data.Word (Word8, Word32, Word64)
import qualified Codec.Compression.Zlib.Raw as Z
import qualified Codec.Compression.Zlib.Internal as Z
import Control.Monad.Primitive
import Data.Maybe (fromMaybe)
import Control.Monad.Trans.Class (MonadTrans(lift))
import qualified Control.Monad.ST.Lazy as STL
import Codec.Archive.Zip.Conduit.Types
@ -71,5 +77,33 @@ inputSize (CI.ConduitM src) = CI.ConduitM $ \rest -> let
maxBound32 :: Integral n => n
maxBound32 = fromIntegral (maxBound :: Word32)
deflateWindowBits :: WindowBits
deflateWindowBits = WindowBits (-15)
awaitNonNull :: forall m o. Monad m => C.ConduitT BS.ByteString o m (Maybe BS.ByteString)
awaitNonNull = do
next <- C.await
case next of
Nothing -> return Nothing
Just bs
| BS.null bs -> awaitNonNull
| otherwise -> return $ Just bs
compressStream :: forall m.
PrimMonad m
=> Z.CompressParams
-> C.ConduitT BS.ByteString BS.ByteString m ()
compressStream params = C.transPipe primToPrim . go $ Z.compressST Z.rawFormat params
where
go Z.CompressStreamEnd = return ()
go (Z.CompressOutputAvailable outBS cont) = C.yield outBS >> lift cont >>= go
go (Z.CompressInputRequired cont) = awaitNonNull >>= lift . cont . fromMaybe BS.empty >>= go
decompressStream :: forall m.
( MonadThrow m, PrimMonad m )
=> C.ConduitT BS.ByteString BS.ByteString m ()
decompressStream = go $ Z.decompressST Z.rawFormat Z.defaultDecompressParams
where
go :: Z.DecompressStream (STL.ST (PrimState m)) -> C.ConduitT BS.ByteString BS.ByteString m ()
go (Z.DecompressStreamEnd unconsumed) = C.leftover unconsumed
go (Z.DecompressOutputAvailable outBS cont) = C.yield outBS >> lift (primToPrim cont) >>= go
go (Z.DecompressInputRequired cont) = awaitNonNull >>= lift . primToPrim . cont . fromMaybe BS.empty >>= go
go (Z.DecompressStreamError err) = throwM err

View File

@ -4,8 +4,7 @@ import Control.Exception (Exception(..))
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Conduit as C
import Data.Conduit.Binary (sourceLbs)
import Data.Semigroup (Semigroup(..))
import qualified Data.Conduit.Combinators as C
import Data.String (IsString(..))
import qualified Data.Text as T
import Data.Time.LocalTime (LocalTime)
@ -51,7 +50,7 @@ instance Monad m => Monoid (ZipData m) where
-- |Normalize any 'ZipData' to a simple source
sourceZipData :: Monad m => ZipData m -> C.ConduitM () ByteString m ()
sourceZipData (ZipDataByteString b) = sourceLbs b
sourceZipData (ZipDataByteString b) = C.sourceLazy b
sourceZipData (ZipDataSource s) = s
-- |Convert between unpacked (as 'Codec.Archive.Zip.Conduit.UnZip.unZipStream' produces) and packed (as 'Codec.Archive.Zip.Conduit.Zip.zipStream' consumes) representations.

View File

@ -22,7 +22,6 @@ import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL
import Data.Conduit.Serialization.Binary (sinkGet)
import qualified Data.Conduit.Zlib as CZ
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian)
@ -173,7 +172,7 @@ unZipStream = next where
dcomp <- case comp of
0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data"
| otherwise -> return idConduit
8 -> return $ CZ.decompress deflateWindowBits
8 -> return decompressStream
_ -> fail $ "Unsupported compression method: " ++ show comp
time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le
crc <- G.getWord32le
@ -182,6 +181,7 @@ unZipStream = next where
nlen <- fromIntegral <$> G.getWord16le
elen <- fromIntegral <$> G.getWord16le
name <- G.getByteString nlen
dName <- if testBit gpf 11 then Left <$> either (fail . show) return (TE.decodeUtf8' name) else return $ Right name
let getExt ext = do
t <- G.getWord16le
z <- fromIntegral <$> G.getWord16le
@ -218,7 +218,7 @@ unZipStream = next where
}
return FileHeader
{ fileEntry = ZipEntry
{ zipEntryName = if testBit gpf 11 then Left (TE.decodeUtf8 name) else Right name
{ zipEntryName = dName
, zipEntryTime = time
, zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize
, zipEntryExternalAttributes = Nothing

View File

@ -5,6 +5,8 @@
module Codec.Archive.Zip.Conduit.Zip
( zipStream
, ZipOptions(..)
, Z.CompressionLevel
, Z.defaultCompression, Z.noCompression, Z.bestSpeed, Z.bestCompression, Z.compressionLevel
, ZipInfo(..)
, defaultZipOptions
, ZipEntry(..)
@ -28,10 +30,9 @@ import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as C
import Data.Conduit.Lift (stateC, execStateC)
import Data.Conduit.Serialization.Binary (sourcePut)
import qualified Data.Conduit.Zlib as CZ
import Data.Digest.CRC32 (crc32)
import Data.Either (isLeft)
import Data.Maybe (fromMaybe, fromJust)
@ -46,14 +47,14 @@ import Codec.Archive.Zip.Conduit.Internal
-- |Options controlling zip file parameters and features
data ZipOptions = ZipOptions
{ zipOpt64 :: Bool -- ^Allow 'ZipDataSource's over 4GB (reduces compatibility in some cases); this is automatically enabled for any files of known size (e.g., 'zipEntrySize')
, zipOptCompressLevel :: Int -- ^Compress (0 = store only, 9 = best) zipped files (improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package)
, zipOptCompressLevel :: Z.CompressionLevel
, zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file
}
defaultZipOptions :: ZipOptions
defaultZipOptions = ZipOptions
{ zipOpt64 = False
, zipOptCompressLevel = -1
, zipOptCompressLevel = Z.defaultCompression
, zipOptInfo = ZipInfo
{ zipComment = BS.empty
}
@ -64,9 +65,9 @@ infixr 7 ?*
True ?* x = x
False ?* _ = 0
-- |Use a file on disk as 'ZipData' (@'ZipDataSource' . 'CB.sourceFile'@).
-- |Use a file on disk as 'ZipData' (@'ZipDataSource' . 'C.sourceFile'@).
zipFileData :: MonadResource m => FilePath -> ZipData m
zipFileData = ZipDataSource . CB.sourceFile
zipFileData = ZipDataSource . C.sourceFile
zipData :: Monad m => ZipData m -> Either (C.ConduitM () BS.ByteString m ()) BSL.ByteString
zipData (ZipDataByteString b) = Right b
@ -120,13 +121,14 @@ zipStream ZipOptions{..} = execStateC 0 $ do
entry (ZipEntry{..}, zipData -> dat) = do
let usiz = dataSize dat
sdat = left ((C..| sizeCRC) . C.toProducer) dat
comp = zipOptCompressLevel /= 0
comp = zipOptCompressLevel /= Z.noCompression
&& all (0 /=) usiz
&& all (0 /=) zipEntrySize
compressParams = Z.defaultCompressParams { Z.compressLevel = zipOptCompressLevel }
(cdat, csiz)
| comp =
( ((`C.fuseBoth` (outputSize $ CZ.compress zipOptCompressLevel deflateWindowBits))
+++ Z.compress) sdat -- level for Z.compress?
( ((`C.fuseBoth` (outputSize $ compressStream compressParams))
+++ Z.compressWith compressParams) sdat
, dataSize cdat)
| otherwise = (left (fmap (id &&& fst)) sdat, usiz)
z64 = maybe (zipOpt64 || any (maxBound32 <) zipEntrySize)
@ -172,7 +174,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do
putsz csz
putsz usz
return r)
(\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b)
(\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ C.sourceLazy b)
cdat
when (any (usz /=) zipEntrySize) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": incorrect zipEntrySize"
return $ do

View File

@ -5,7 +5,7 @@ import Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as C
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import Data.Time.LocalTime (localTimeToUTC, utc)
@ -18,7 +18,7 @@ import System.Directory (createDirectoryIfMissing
import System.Environment (getProgName, getArgs)
import System.Exit (exitFailure)
import System.FilePath.Posix (takeDirectory) -- zip files only use forward slashes
import System.IO (stdin, openFile, IOMode(WriteMode), hClose, hSetFileSize, hPutStrLn, stderr)
import System.IO (openFile, IOMode(WriteMode), hClose, hSetFileSize, hPutStrLn, stderr)
import Codec.Archive.Zip.Conduit.UnZip
@ -32,7 +32,7 @@ extract = C.awaitForever start where
else do -- C.bracketP
h <- liftIO $ openFile name WriteMode
mapM_ (liftIO . hSetFileSize h . toInteger) zipEntrySize
write C..| CB.sinkHandle h
write C..| C.sinkHandle h
liftIO $ hClose h
#if MIN_VERSION_directory(1,2,3)
liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone
@ -53,6 +53,6 @@ main = do
hPutStrLn stderr $ "Usage: " ++ prog ++ "\nRead a zip file from stdin and extract it in the current directory."
exitFailure
ZipInfo{..} <- C.runConduit
$ CB.sourceHandle stdin
$ C.stdin
C..| C.fuseUpstream unZipStream extract
BSC.putStrLn zipComment

View File

@ -5,7 +5,7 @@ import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.Trans.Resource (MonadResource, runResourceT)
import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as C
import Data.List (foldl')
import qualified Data.Text as T
import Data.Time.LocalTime (utcToLocalTime, utc)
@ -25,15 +25,15 @@ import System.Directory (doesDirectoryExist, getModificationTime
import System.Environment (getProgName, getArgs)
import System.Exit (exitFailure)
import System.FilePath.Posix ((</>)) -- zip files only want forward slashes
import System.IO (stdout, hPutStrLn, stderr)
import System.IO (hPutStrLn, stderr)
import Codec.Archive.Zip.Conduit.Zip
opts :: [Opt.OptDescr (ZipOptions -> ZipOptions)]
opts =
[ Opt.Option "z" ["compress"] (Opt.ReqArg (\l o -> o{ zipOptCompressLevel = read l }) "LEVEL")
[ Opt.Option "z" ["compress"] (Opt.ReqArg (\l o -> o{ zipOptCompressLevel = compressionLevel $ read l }) "LEVEL")
"set compression level for files (0-9)"
, Opt.Option "0" ["store"] (Opt.NoArg (\o -> o{ zipOptCompressLevel = 0 }))
, Opt.Option "0" ["store"] (Opt.NoArg (\o -> o{ zipOptCompressLevel = noCompression }))
"don't compress files (-z0)"
, Opt.Option "e" ["zip64"] (Opt.NoArg (\o -> o{ zipOpt64 = True }))
"enable zip64 support for files over 4GB"
@ -85,4 +85,4 @@ main = do
runResourceT $ C.runConduit
$ generate paths
C..| void (zipStream opt)
C..| CB.sinkHandle stdout
C..| C.stdout

10
nixpkgs.nix Normal file
View File

@ -0,0 +1,10 @@
{ nixpkgs ? import <nixpkgs>
}:
import ((nixpkgs {}).fetchFromGitHub {
owner = "NixOS";
repo = "nixpkgs";
rev = "bc00ecedfa709f4fa91d445dd76ecd792cb2c728";
sha256 = "0plhwb04srr4b0h7w8qlqi207a19szz2wqz6r4gmic856jlkchaa";
fetchSubmodules = true;
})

13
stack.nix Normal file
View File

@ -0,0 +1,13 @@
{ ghc, nixpkgs ? import ./nixpkgs.nix {} }:
let
haskellPackages = pkgs.haskellPackages;
inherit (nixpkgs {}) pkgs;
in pkgs.haskell.lib.buildStackProject {
inherit ghc;
inherit (haskellPackages) stack;
name = "stackenv";
buildInputs = (with pkgs;
[ zlib
]);
}

View File

@ -1,4 +1,9 @@
resolver: lts-12.14
resolver: lts-16.13
packages:
- '.'
extra-deps: []
- '.'
extra-deps:
- primitive-0.7.1.0@sha256:6a237bb338bcc43193077ff8e8c0f0ce2de14c652231496a15672e8b563a07e2,2604
nix:
packages: []
shell-file: ./stack.nix
add-gc-roots: true

19
stack.yaml.lock Normal file
View File

@ -0,0 +1,19 @@
# This file was autogenerated by Stack.
# You should not edit this file by hand.
# For more information, please see the documentation at:
# https://docs.haskellstack.org/en/stable/lock_files
packages:
- completed:
hackage: primitive-0.7.1.0@sha256:6a237bb338bcc43193077ff8e8c0f0ce2de14c652231496a15672e8b563a07e2,2604
pantry-tree:
size: 1376
sha256: 924e88629b493abb6b2f3c3029cef076554a2b627091e3bb6887ec03487a707d
original:
hackage: primitive-0.7.1.0@sha256:6a237bb338bcc43193077ff8e8c0f0ce2de14c652231496a15672e8b563a07e2,2604
snapshots:
- completed:
size: 532381
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/16/13.yaml
sha256: 6ee17f7996e5bc75ae4406250841f1362ad4196418a4d90a0615ff4f26ac98df
original: lts-16.13

View File

@ -30,14 +30,14 @@ library
binary-conduit,
bytestring,
conduit,
conduit-extra,
digest,
exceptions,
mtl,
primitive,
primitive >= 0.7.1.0,
resourcet,
text,
time,
transformers,
transformers-base,
zlib
@ -50,7 +50,6 @@ executable unzip-stream
base >=4.8 && <5,
bytestring,
conduit,
conduit-extra,
directory,
filepath,
text,
@ -67,7 +66,6 @@ executable zip-stream
base >=4.8 && <5,
bytestring,
conduit,
conduit-extra,
directory,
filepath,
resourcet,