在python中解压嵌套的压缩文件

Question 1

我正在寻找一种在python中解压缩嵌套的zip文件的方法。例如，考虑以下结构（为方便起见，假设的名字）。

Folder

ZipfileA.zip

ZipfileA1.zip

ZipfileA2.zip

ZipfileB.zip

ZipfileB1.zip

ZipfileB2.zip

...等等。我正试图访问第二层压缩文件中的文本文件。我当然不想把所有的东西都提取出来，因为数量太多，会使电脑崩溃（第一层有几百个压缩包，第二层有近一万个压缩包（每个压缩包））。

我一直在玩 "zipfile "模块--我能够打开第一层的zip文件。例如。

zipfile_obj = zipfile.ZipFile("/Folder/ZipfileA.zip")
next_layer_zip = zipfile_obj.open("ZipfileA1.zip")
然而，这将返回一个 "ZipExtFile "实例（而不是一个文件或zipfile实例）--然后我就不能继续打开这个特定的数据类型。我不能这样做。
data = next_layer_zip.open(data.txt)
然而，我可以用 "读取 "这个压缩文件的文件。
next_layer_zip.read()
但这完全是无用的!(即只能读取压缩的数据/goobledigook）。
有谁能告诉我如何去做这件事 (without使用ZipFile.extract）？
我看到了这个。http://pypi.python.org/pypi/zip_open/- 这看起来正是我想要的，但它似乎对我不起作用。(在我试图处理的文件中，我一直得到"[Errno 2] No such file or directory:"（使用该模块）。
如果有任何想法，我们将不胜感激！!谢谢你

Question 2


          
           
            
             ZipFile需要一个类似文件的对象，所以你可以使用StringIO将你从嵌套的zip中读取的数据变成这样一个对象。需要注意的是，你将会把完整的（仍然是压缩的）内部压缩文件加载到内存中。
            
            with zipfile.ZipFile('foo.zip') as z:
    with z.open('nested.zip') as z2:
        z2_filedata = cStringIO.StringIO(z2.read())
        with zipfile.ZipFile(z2_filedata) as nested_zip:
            print nested_zip.open('data.txt').read()

Question 3


          
           
            
             
              不幸的是，解压压缩文件需要随机访问档案，而
              
               ZipFile
              
              方法（更不用说DEFLATE算法本身）只提供流。因此，在不解压缩的情况下，不可能对嵌套的压缩文件进行解压。

Question 4


          
           
            
             
              这是我想出的一个函数。
             
             def extract_nested_zipfile(path, parent_zip=None):
    """Returns a ZipFile specified by path, even if the path contains
    intermediary ZipFiles.  For example, /root/gparent.zip/parent.zip/child.zip
    will return a ZipFile that represents child.zip
    def extract_inner_zipfile(parent_zip, child_zip_path):
        """Returns a ZipFile specified by child_zip_path that exists inside
        parent_zip.
        memory_zip = StringIO()
        memory_zip.write(parent_zip.open(child_zip_path).read())
        return zipfile.ZipFile(memory_zip)
    if ('.zip' + os.sep) in path:
        (parent_zip_path, child_zip_path) = os.path.relpath(path).split(
            '.zip' + os.sep, 1)
        parent_zip_path += '.zip'
        if not parent_zip:
            # This is the top-level, so read from disk
            parent_zip = zipfile.ZipFile(parent_zip_path)
        else:
            # We're already in a zip, so pull it out and recurse
            parent_zip = extract_inner_zipfile(parent_zip, parent_zip_path)
        return extract_nested_zipfile(child_zip_path, parent_zip)
    else:
        if parent_zip:
            return extract_inner_zipfile(parent_zip, path)
        else:
            # If there is no nesting, it's easy!
            return zipfile.ZipFile(path)
Here's how I tested it:
echo hello world > hi.txt
zip wrap1.zip hi.txt
zip wrap2.zip wrap1.zip
zip wrap3.zip wrap2.zip
print extract_nested_zipfile('/Users/mattfaus/dev/dev-git/wrap1.zip').open('hi.txt').read()
print extract_nested_zipfile('/Users/mattfaus/dev/dev-git/wrap2.zip/wrap1.zip').open('hi.txt').read()
print extract_nested_zipfile('/Users/mattfaus/dev/dev-git/wrap3.zip/wrap2.zip/wrap1.zip').open('hi.txt').read()

Question 5


          
           
            
             
              
               
                对于那些寻找一个提取嵌套压缩文件（任何级别的嵌套）并清理原始压缩文件的函数。
               
               import zipfile, re, os
def extract_nested_zip(zippedFile, toFolder):
    """ Unzip a zip file and its contents, including nested zip files
        Delete the zip file(s) after extraction
    with zipfile.ZipFile(zippedFile, 'r') as zfile:
        zfile.extractall(path=toFolder)
    os.remove(zippedFile)
    for root, dirs, files in os.walk(toFolder):
        for filename in files:
            if re.search(r'\.zip$', filename):
                fileSpec = os.path.join(root, filename)
                extract_nested_zip(fileSpec, root)

Question 6


          
           
            
             
              
               
                
                 I use python 3.7.3
                
                import zipfile
import io
with zipfile.ZipFile('all.zip') as z:
    with z.open('nested.zip') as z2:
        z2_filedata =  io.BytesIO(z2.read())
        with zipfile.ZipFile(z2_filedata) as nested_zip:
            print( nested_zip.open('readme.md').read())

Question 7


          
           
            
             
              
               
                
                 
                  这对我来说很有效。只要把这个脚本和嵌套的压缩文件放在同一个目录下。它也会计算嵌套的压缩包中的文件总数
                 
                 import os
from zipfile import ZipFile
def unzip (path, total_count):
    for root, dirs, files in os.walk(path):
        for file in files:
            file_name = os.path.join(root, file)
            if (not file_name.endswith('.zip')):
                total_count += 1
            else:
                currentdir = file_name[:-4]
                if not os.path.exists(currentdir):
                    os.makedirs(currentdir)
                with ZipFile(file_name) as zipObj:
                    zipObj.extractall(currentdir)
                os.remove(file_name)
                total_count = unzip(currentdir, total_count)
    return total_count
total_count = unzip ('.', 0)
print(total_count)

Question 8


          
           
            
             
              
               
                
                 
                  
                   我处理这种问题的方法是这样的，包括自我分配的对象。
                  
                  import os
import re 
import zipfile
import pandas as pd
# import numpy as np
path = r'G:\Important\Data\EKATTE'
# DESCRIBE
archives = os.listdir(path)
archives = [ar for ar in archives if ar.endswith(".zip")]
contents = pd.DataFrame({'elec_date':[],'files':[]})
for a in archives:
    archive = zipfile.ZipFile( path+'\\'+a )
    filelist = archive.namelist()
    # archive.infolist()
    for i in archive.namelist():
        if re.match('.*zip', i):
            sub_arch = zipfile.ZipFile(archive.open(i))
            sub_names = [x for x in sub_arch.namelist()]