Allow filenames to be stored as utf8 text

Fixes #3
This commit is contained in:
Dylan Simon 2018-04-24 13:48:32 -04:00
parent d7e9b3a74f
commit 33d3a06b35
6 changed files with 29 additions and 16 deletions

View File

@ -7,6 +7,7 @@ import qualified Data.Conduit as C
import Data.Conduit.Binary (sourceLbs)
import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..))
import qualified Data.Text as T
import Data.Time.LocalTime (LocalTime)
import Data.Typeable (Typeable)
import Data.Word (Word64)
@ -29,7 +30,7 @@ data ZipInfo = ZipInfo
-- |(The beginning of) a single entry in a zip stream, which may be any file or directory.
-- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that.
data ZipEntry = ZipEntry
{ zipEntryName :: ByteString -- ^File name (in posix format, no leading slashes), usually utf-8 encoded, with a trailing slash for directories
{ zipEntryName :: Either T.Text ByteString -- ^File name (in posix format, no leading slashes), either UTF-8 encoded text or raw bytes (CP437), with a trailing slash for directories
, zipEntryTime :: LocalTime -- ^Modification time
, zipEntrySize :: Maybe Word64 -- ^Size of file data (if known); checked on zipping and also used as hint to enable zip64
} deriving (Eq, Show)

View File

@ -23,6 +23,8 @@ import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL
import Data.Conduit.Serialization.Binary (sinkGet)
import qualified Data.Conduit.Zlib as CZ
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian)
import Data.Word (Word16, Word32, Word64)
@ -136,7 +138,7 @@ unZipStream = next where
-- optional data description (possibly ambiguous!)
sinkGet $ (guard =<< dataDesc h) <|> return ()
return (size == usize && crc == fileCRC)
unless r $ zipError $ BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed"
unless r $ zipError $ either T.unpack BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed"
next
EndOfCentralDirectory{..} -> do
return endInfo
@ -166,7 +168,7 @@ unZipStream = next where
when (ver > zipVersion) $ fail $ "Unsupported version: " ++ show ver
gpf <- G.getWord16le
-- when (gpf .&. complement (bit 1 .|. bit 2 .|. bit 3) /= 0) $ fail $ "Unsupported flags: " ++ show gpf
when (gpf `clearBit` 1 `clearBit` 2 `clearBit` 3 /= 0) $ fail $ "Unsupported flags: " ++ show gpf
when (gpf `clearBit` 1 `clearBit` 2 `clearBit` 3 `clearBit` 11 /= 0) $ fail $ "Unsupported flags: " ++ show gpf
comp <- G.getWord16le
dcomp <- case comp of
0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data"
@ -216,7 +218,7 @@ unZipStream = next where
}
return FileHeader
{ fileEntry = ZipEntry
{ zipEntryName = name
{ zipEntryName = if testBit gpf 11 then Left (TE.decodeUtf8 name) else Right name
, zipEntryTime = time
, zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize
}

View File

@ -35,6 +35,8 @@ import qualified Data.Conduit.Zlib as CZ
import Data.Digest.CRC32 (crc32)
import Data.Either (isLeft)
import Data.Maybe (fromMaybe, fromJust)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian)
import Data.Word (Word16, Word64)
@ -127,12 +129,13 @@ zipStream ZipOptions{..} = execStateC 0 $ do
| otherwise = (left (fmap (id &&& fst)) sdat, usiz)
z64 = maybe (zipOpt64 || any (maxBound32 <) zipEntrySize)
(maxBound32 <) (max <$> usiz <*> csiz)
namelen = BS.length zipEntryName
name = either TE.encodeUtf8 id zipEntryName
namelen = BS.length name
(time, date) = toDOSTime zipEntryTime
mcrc = either (const Nothing) (Just . crc32) dat
when (namelen > maxBound16) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long"
when (namelen > maxBound16) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": entry name too long"
let common = do
P.putWord16le $ isLeft dat ?* bit 3
P.putWord16le $ isLeft dat ?* bit 3 .|. isLeft zipEntryName ?* bit 11
P.putWord16le $ comp ?* 8
P.putWord16le $ time
P.putWord16le $ date
@ -147,7 +150,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do
P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral usiz
P.putWord16le $ fromIntegral namelen
P.putWord16le $ z64 ?* 20
P.putByteString zipEntryName
P.putByteString name
when z64 $ do
P.putWord16le 0x0001
P.putWord16le 16
@ -157,7 +160,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do
((usz, crc), csz) <- either
(\cd -> do
r@((usz, crc), csz) <- outsz cd -- write compressed data
when (not z64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled"
when (not z64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled"
output $ do
P.putWord32le 0x08074b50
P.putWord32le crc
@ -169,7 +172,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do
return r)
(\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b)
cdat
when (any (usz /=) zipEntrySize) $ zipError $ BSC.unpack zipEntryName ++ ": incorrect zipEntrySize"
when (any (usz /=) zipEntrySize) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": incorrect zipEntrySize"
return $ do
-- central directory
let o64 = off >= maxBound32
@ -191,7 +194,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do
P.putWord16le 0 -- internal file attributes
P.putWord32le 0 -- external file attributes
P.putWord32le $ if o64 then maxBound32 else fromIntegral off
P.putByteString zipEntryName
P.putByteString name
when a64 $ do
P.putWord16le 0x0001
P.putWord16le l64

View File

@ -6,6 +6,8 @@ import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import Data.Time.LocalTime (localTimeToUTC, utc)
import Data.Void (Void)
import System.Directory (createDirectoryIfMissing
@ -23,9 +25,9 @@ import Codec.Archive.Zip.Conduit.UnZip
extract :: C.ConduitM (Either ZipEntry BS.ByteString) Void IO ()
extract = C.awaitForever start where
start (Left ZipEntry{..}) = do
liftIO $ BSC.putStrLn zipEntryName
liftIO $ either TIO.putStrLn BSC.putStrLn zipEntryName
liftIO $ createDirectoryIfMissing True (takeDirectory name)
if BSC.last zipEntryName == '/'
if either T.last BSC.last zipEntryName == '/'
then when ((0 /=) `any` zipEntrySize) $ fail $ name ++ ": non-empty directory"
else do -- C.bracketP
h <- liftIO $ openFile name WriteMode
@ -35,7 +37,7 @@ extract = C.awaitForever start where
#if MIN_VERSION_directory(1,2,3)
liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone
#endif
where name = BSC.unpack $ BSC.dropWhile ('/' ==) zipEntryName -- should we utf8 decode?
where name = either (T.unpack . T.dropWhile ('/' ==)) (BSC.unpack . BSC.dropWhile ('/' ==)) zipEntryName
start (Right _) = fail "Unexpected leading or directory data contents"
write = C.await >>= maybe
(return ())

View File

@ -1,4 +1,5 @@
{-# LANGUAGE CPP #-}
import Control.Arrow ((+++))
import Control.Monad (filterM, void)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.Trans.Resource (MonadResource, runResourceT)
@ -6,6 +7,7 @@ import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB
import Data.List (foldl')
import qualified Data.Text as T
import Data.Time.LocalTime (utcToLocalTime, utc)
import qualified System.Console.GetOpt as Opt
import System.Directory (doesDirectoryExist, getModificationTime
@ -43,7 +45,7 @@ generate :: (MonadIO m, MonadResource m) => [FilePath] -> C.ConduitM () (ZipEntr
generate (p:paths) = do
t <- liftIO $ getModificationTime p
let e = ZipEntry
{ zipEntryName = BSC.pack $ dropWhile ('/' ==) p
{ zipEntryName = Right $ BSC.pack $ dropWhile ('/' ==) p
, zipEntryTime = utcToLocalTime utc t -- FIXME: timezone
, zipEntrySize = Nothing
}
@ -62,7 +64,7 @@ generate (p:paths) = do
#else
filter (`notElem` [".",".."]) . map (p </>) <$> getDirectoryContents p
#endif
C.yield (e{ zipEntryName = zipEntryName e `BSC.snoc` '/', zipEntrySize = Just 0 }, mempty)
C.yield (e{ zipEntryName = (`T.snoc` '/') +++ (`BSC.snoc` '/') $ zipEntryName e, zipEntrySize = Just 0 }, mempty)
generate $ dl ++ paths
else do
C.yield (e, zipFileData p)

View File

@ -36,6 +36,7 @@ library
mtl,
primitive,
resourcet,
text,
time,
transformers-base,
zlib
@ -52,6 +53,7 @@ executable unzip-stream
conduit-extra,
directory,
filepath,
text,
time,
transformers,
zip-stream
@ -69,6 +71,7 @@ executable zip-stream
directory,
filepath,
resourcet,
text,
time,
transformers,
zip-stream