482 lines
17 KiB
Python
Raw Normal View History

2019-02-26 15:55:07 +07:00
#!/usr/bin/env python
#
# Copyright (C) 2019 GNS3 Technologies Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Iterable ZIP archive generator.
Derived directly from zipfile.py and the zipstream project
https://github.com/allanlei/python-zipstream
"""
import os
import sys
import stat
import struct
import time
import zipfile
import asyncio
import aiofiles
from concurrent import futures
2021-04-13 18:46:50 +09:30
from zipfile import (
structCentralDir,
structEndArchive64,
structEndArchive,
structEndArchive64Locator,
stringCentralDir,
stringEndArchive64,
stringEndArchive,
stringEndArchive64Locator,
)
2019-02-26 15:55:07 +07:00
2021-04-13 18:46:50 +09:30
stringDataDescriptor = b"PK\x07\x08" # magic number for data descriptor
2019-02-26 15:55:07 +07:00
def _get_compressor(compress_type):
"""
Return the compressor.
"""
if compress_type == zipfile.ZIP_DEFLATED:
from zipfile import zlib
2021-04-13 18:46:50 +09:30
2019-02-26 15:55:07 +07:00
return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
elif compress_type == zipfile.ZIP_BZIP2:
from zipfile import bz2
2021-04-13 18:46:50 +09:30
2019-02-26 15:55:07 +07:00
return bz2.BZ2Compressor()
elif compress_type == zipfile.ZIP_LZMA:
from zipfile import LZMACompressor
2021-04-13 18:46:50 +09:30
2019-02-26 15:55:07 +07:00
return LZMACompressor()
else:
return None
2021-04-13 18:37:58 +09:30
class PointerIO:
2021-04-13 18:46:50 +09:30
def __init__(self, mode="wb"):
if mode not in ("wb",):
2019-02-26 15:55:07 +07:00
raise RuntimeError('zipstream.ZipFile() requires mode "wb"')
self.data_pointer = 0
self.__mode = mode
self.__closed = False
@property
def mode(self):
return self.__mode
@property
def closed(self):
return self.__closed
def close(self):
self.__closed = True
def flush(self):
pass
def next(self):
raise NotImplementedError()
def tell(self):
return self.data_pointer
def truncate(size=None):
raise NotImplementedError()
def write(self, data):
if self.closed:
2021-04-13 18:46:50 +09:30
raise ValueError("I/O operation on closed file")
2019-02-26 15:55:07 +07:00
if isinstance(data, str):
2021-04-13 18:46:50 +09:30
data = data.encode("utf-8")
2019-02-26 15:55:07 +07:00
if not isinstance(data, bytes):
2021-04-13 18:46:50 +09:30
raise TypeError("expected bytes")
2019-02-26 15:55:07 +07:00
self.data_pointer += len(data)
return data
class ZipInfo(zipfile.ZipInfo):
def __init__(self, *args, **kwargs):
zipfile.ZipInfo.__init__(self, *args, **kwargs)
def DataDescriptor(self):
"""
crc-32 4 bytes
compressed size 4 bytes
uncompressed size 4 bytes
"""
if self.compress_size > zipfile.ZIP64_LIMIT or self.file_size > zipfile.ZIP64_LIMIT:
2021-04-13 18:46:50 +09:30
fmt = b"<4sLQQ"
2019-02-26 15:55:07 +07:00
else:
2021-04-13 18:46:50 +09:30
fmt = b"<4sLLL"
2019-02-26 15:55:07 +07:00
return struct.pack(fmt, stringDataDescriptor, self.CRC, self.compress_size, self.file_size)
class ZipFile(zipfile.ZipFile):
2021-04-13 18:46:50 +09:30
def __init__(self, fileobj=None, mode="w", compression=zipfile.ZIP_STORED, allowZip64=True, chunksize=32768):
2019-02-26 15:55:07 +07:00
"""Open the ZIP file with mode write "w"."""
2021-04-13 18:46:50 +09:30
if mode not in ("w",):
2019-02-26 15:55:07 +07:00
raise RuntimeError('aiozipstream.ZipFile() requires mode "w"')
if fileobj is None:
fileobj = PointerIO()
2021-04-13 18:46:50 +09:30
self._comment = b""
2019-02-26 15:55:07 +07:00
zipfile.ZipFile.__init__(self, fileobj, mode=mode, compression=compression, allowZip64=allowZip64)
self._chunksize = chunksize
self.paths_to_write = []
def __aiter__(self):
return self._stream()
@property
def comment(self):
"""
The comment text associated with the ZIP file.
"""
return self._comment
@comment.setter
def comment(self, comment):
"""
Add a comment text associated with the ZIP file.
"""
if not isinstance(comment, bytes):
raise TypeError("comment: expected bytes, got %s" % type(comment))
# check for valid comment length
if len(comment) >= zipfile.ZIP_MAX_COMMENT:
if self.debug:
2021-04-13 18:46:50 +09:30
print("Archive comment is too long; truncating to %d bytes" % zipfile.ZIP_MAX_COMMENT)
comment = comment[: zipfile.ZIP_MAX_COMMENT]
2019-02-26 15:55:07 +07:00
self._comment = comment
self._didModify = True
async def data_generator(self, path):
async with aiofiles.open(path, "rb") as f:
while True:
part = await f.read(self._chunksize)
if not part:
break
2020-05-19 16:14:58 +09:30
yield part
2019-02-26 15:55:07 +07:00
return
async def _run_in_executor(self, task, *args, **kwargs):
"""
Run synchronous task in separate thread and await for result.
"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(futures.ThreadPoolExecutor(max_workers=1), task, *args, **kwargs)
async def _stream(self):
for kwargs in self.paths_to_write:
async for chunk in self._write(**kwargs):
2020-05-19 16:14:58 +09:30
yield chunk
2019-02-26 15:55:07 +07:00
for chunk in self._close():
2020-05-19 16:14:58 +09:30
yield chunk
2019-02-26 15:55:07 +07:00
def write(self, filename, arcname=None, compress_type=None):
"""
Write a file to the archive under the name `arcname`.
"""
2021-04-13 18:46:50 +09:30
kwargs = {"filename": filename, "arcname": arcname, "compress_type": compress_type}
2019-02-26 15:55:07 +07:00
self.paths_to_write.append(kwargs)
def write_iter(self, arcname, iterable, compress_type=None):
"""
Write the bytes iterable `iterable` to the archive under the name `arcname`.
"""
2021-04-13 18:46:50 +09:30
kwargs = {"arcname": arcname, "iterable": iterable, "compress_type": compress_type}
2019-02-26 15:55:07 +07:00
self.paths_to_write.append(kwargs)
def writestr(self, arcname, data, compress_type=None):
"""
Writes a str into ZipFile by wrapping data as a generator
"""
def _iterable():
yield data
2021-04-13 18:46:50 +09:30
2019-02-26 15:55:07 +07:00
return self.write_iter(arcname, _iterable(), compress_type=compress_type)
async def _write(self, filename=None, iterable=None, arcname=None, compress_type=None):
"""
Put the bytes from filename into the archive under the name `arcname`.
"""
if not self.fp:
2021-04-13 18:46:50 +09:30
raise RuntimeError("Attempt to write to ZIP archive that was already closed")
2019-02-26 15:55:07 +07:00
if (filename is None and iterable is None) or (filename is not None and iterable is not None):
raise ValueError("either (exclusively) filename or iterable shall be not None")
if filename:
st = os.stat(filename)
isdir = stat.S_ISDIR(st.st_mode)
mtime = time.localtime(st.st_mtime)
date_time = mtime[0:6]
else:
st, isdir, date_time = None, False, time.localtime()[0:6]
# Create ZipInfo instance to store file information
if arcname is None:
arcname = filename
arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
while arcname[0] in (os.sep, os.altsep):
arcname = arcname[1:]
if isdir:
2021-04-13 18:46:50 +09:30
arcname += "/"
2019-02-26 15:55:07 +07:00
zinfo = ZipInfo(arcname, date_time)
if st:
2021-04-13 18:46:50 +09:30
zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
2019-02-26 15:55:07 +07:00
else:
2021-04-13 18:46:50 +09:30
zinfo.external_attr = 0o600 << 16 # ?rw-------
2019-02-26 15:55:07 +07:00
if compress_type is None:
zinfo.compress_type = self.compression
else:
zinfo.compress_type = compress_type
if st:
zinfo.file_size = st[6]
else:
zinfo.file_size = 0
zinfo.flag_bits = 0x00
2021-04-13 18:46:50 +09:30
zinfo.flag_bits |= 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor
zinfo.header_offset = self.fp.tell() # Start of header bytes
2019-02-26 15:55:07 +07:00
if zinfo.compress_type == zipfile.ZIP_LZMA:
# Compressed data includes an end-of-stream (EOS) marker
zinfo.flag_bits |= 0x02
self._writecheck(zinfo)
self._didModify = True
if isdir:
zinfo.file_size = 0
zinfo.compress_size = 0
zinfo.CRC = 0
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
2020-05-19 16:14:58 +09:30
yield self.fp.write(zinfo.FileHeader(False))
2019-02-26 15:55:07 +07:00
return
cmpr = _get_compressor(zinfo.compress_type)
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
# Compressed size can be larger than uncompressed size
zip64 = self._allowZip64 and zinfo.file_size * 1.05 > zipfile.ZIP64_LIMIT
2020-05-19 16:14:58 +09:30
yield self.fp.write(zinfo.FileHeader(zip64))
2019-02-26 15:55:07 +07:00
file_size = 0
if filename:
async for buf in self.data_generator(filename):
file_size = file_size + len(buf)
2021-04-13 18:46:50 +09:30
CRC = zipfile.crc32(buf, CRC) & 0xFFFFFFFF
2019-02-26 15:55:07 +07:00
if cmpr:
buf = await self._run_in_executor(cmpr.compress, buf)
compress_size = compress_size + len(buf)
2020-05-19 16:14:58 +09:30
yield self.fp.write(buf)
2021-04-13 18:46:50 +09:30
else: # we have an iterable
2019-02-26 15:55:07 +07:00
for buf in iterable:
file_size = file_size + len(buf)
2021-04-13 18:46:50 +09:30
CRC = zipfile.crc32(buf, CRC) & 0xFFFFFFFF
2019-02-26 15:55:07 +07:00
if cmpr:
buf = await self._run_in_executor(cmpr.compress, buf)
compress_size = compress_size + len(buf)
2020-05-19 16:14:58 +09:30
yield self.fp.write(buf)
2019-02-26 15:55:07 +07:00
if cmpr:
buf = cmpr.flush()
compress_size = compress_size + len(buf)
2020-05-19 16:14:58 +09:30
yield self.fp.write(buf)
2019-02-26 15:55:07 +07:00
zinfo.compress_size = compress_size
else:
zinfo.compress_size = file_size
zinfo.CRC = CRC
zinfo.file_size = file_size
if not zip64 and self._allowZip64:
if file_size > zipfile.ZIP64_LIMIT:
2021-04-13 18:46:50 +09:30
raise RuntimeError("File size has increased during compressing")
2019-02-26 15:55:07 +07:00
if compress_size > zipfile.ZIP64_LIMIT:
2021-04-13 18:46:50 +09:30
raise RuntimeError("Compressed size larger than uncompressed size")
2019-02-26 15:55:07 +07:00
2020-05-19 16:14:58 +09:30
yield self.fp.write(zinfo.DataDescriptor())
2019-02-26 15:55:07 +07:00
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
def _close(self):
"""
Close the file, and for mode "w" write the ending records.
"""
if self.fp is None:
return
try:
2021-04-13 18:46:50 +09:30
if self.mode in ("w", "a") and self._didModify: # write ending records
2019-02-26 15:55:07 +07:00
count = 0
pos1 = self.fp.tell()
2021-04-13 18:46:50 +09:30
for zinfo in self.filelist: # write central directory
2019-02-26 15:55:07 +07:00
count = count + 1
dt = zinfo.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
extra = []
if zinfo.file_size > zipfile.ZIP64_LIMIT or zinfo.compress_size > zipfile.ZIP64_LIMIT:
extra.append(zinfo.file_size)
extra.append(zinfo.compress_size)
2021-04-13 18:46:50 +09:30
file_size = 0xFFFFFFFF
compress_size = 0xFFFFFFFF
2019-02-26 15:55:07 +07:00
else:
file_size = zinfo.file_size
compress_size = zinfo.compress_size
if zinfo.header_offset > zipfile.ZIP64_LIMIT:
extra.append(zinfo.header_offset)
2021-04-13 18:46:50 +09:30
header_offset = 0xFFFFFFFF
2019-02-26 15:55:07 +07:00
else:
header_offset = zinfo.header_offset
extra_data = zinfo.extra
min_version = 0
if extra:
# Append a ZIP64 field to the extra's
2021-04-13 18:46:50 +09:30
extra_data = struct.pack(b"<HH" + b"Q" * len(extra), 1, 8 * len(extra), *extra) + extra_data
2019-02-26 15:55:07 +07:00
min_version = zipfile.ZIP64_VERSION
if zinfo.compress_type == zipfile.ZIP_BZIP2:
min_version = max(zipfile.BZIP2_VERSION, min_version)
elif zinfo.compress_type == zipfile.ZIP_LZMA:
min_version = max(zipfile.LZMA_VERSION, min_version)
extract_version = max(min_version, zinfo.extract_version)
create_version = max(min_version, zinfo.create_version)
try:
filename, flag_bits = zinfo._encodeFilenameFlags()
2021-04-13 18:46:50 +09:30
centdir = struct.pack(
structCentralDir,
stringCentralDir,
create_version,
zinfo.create_system,
extract_version,
zinfo.reserved,
flag_bits,
zinfo.compress_type,
dostime,
dosdate,
zinfo.CRC,
compress_size,
file_size,
len(filename),
len(extra_data),
len(zinfo.comment),
0,
zinfo.internal_attr,
zinfo.external_attr,
header_offset,
)
2019-02-26 15:55:07 +07:00
except DeprecationWarning:
2021-04-13 18:46:50 +09:30
print(
(
structCentralDir,
stringCentralDir,
create_version,
zinfo.create_system,
extract_version,
zinfo.reserved,
zinfo.flag_bits,
zinfo.compress_type,
dostime,
dosdate,
zinfo.CRC,
compress_size,
file_size,
len(zinfo.filename),
len(extra_data),
len(zinfo.comment),
0,
zinfo.internal_attr,
zinfo.external_attr,
header_offset,
),
file=sys.stderr,
)
2019-02-26 15:55:07 +07:00
raise
yield self.fp.write(centdir)
yield self.fp.write(filename)
yield self.fp.write(extra_data)
yield self.fp.write(zinfo.comment)
pos2 = self.fp.tell()
# Write end-of-zip-archive record
centDirCount = count
centDirSize = pos2 - pos1
centDirOffset = pos1
2021-04-13 18:46:50 +09:30
if (
centDirCount >= zipfile.ZIP_FILECOUNT_LIMIT
or centDirOffset > zipfile.ZIP64_LIMIT
or centDirSize > zipfile.ZIP64_LIMIT
):
2019-02-26 15:55:07 +07:00
# Need to write the ZIP64 end-of-archive records
zip64endrec = struct.pack(
2021-04-13 18:46:50 +09:30
structEndArchive64,
stringEndArchive64,
44,
45,
45,
0,
0,
centDirCount,
centDirCount,
centDirSize,
centDirOffset,
)
2019-02-26 15:55:07 +07:00
yield self.fp.write(zip64endrec)
2021-04-13 18:46:50 +09:30
zip64locrec = struct.pack(structEndArchive64Locator, stringEndArchive64Locator, 0, pos2, 1)
2019-02-26 15:55:07 +07:00
yield self.fp.write(zip64locrec)
centDirCount = min(centDirCount, 0xFFFF)
centDirSize = min(centDirSize, 0xFFFFFFFF)
centDirOffset = min(centDirOffset, 0xFFFFFFFF)
2021-04-13 18:46:50 +09:30
endrec = struct.pack(
structEndArchive,
stringEndArchive,
0,
0,
centDirCount,
centDirCount,
centDirSize,
centDirOffset,
len(self._comment),
)
2019-02-26 15:55:07 +07:00
yield self.fp.write(endrec)
yield self.fp.write(self._comment)
self.fp.flush()
finally:
fp = self.fp
self.fp = None
if not self._filePassed:
fp.close()