Over a million developers have joined DZone.

Gzip Pipe

DZone's Guide to

Gzip Pipe

Free Resource

#! /usr/bin/python

from gzip import GzipFile
from StringIO import StringIO

class GZipPipe(StringIO) :
    """This class implements a compression pipe suitable for asynchronous 

    Only one buffer of data is read/compressed at a time.
    The process doesn't read the whole file at once : This improves performance
    and prevent hight memory consumption for big files."""

    # Size of the internal buffer
    CHUNCK_SIZE = 1024 

    def __init__(self, source = None, name = "data") :
        @param source   Source data to compress (as a stream/File/Buffer - anything with a read() method)
        @param name     Name of the data within the zip file"""
        # Source file
        self.source = source

        # OEF reached for source ?
        self.source_eof = False

        # Buffer
        self.buffer = ""


        # Inherited constructor

        # Init ZipFile that writes to us (the StringIO buffer)
        self.zipfile = GzipFile(name, 'wb', 9, self)
    def write(self, data) :
        """The write mzthod shouldn't be called from outside.
        A GZipFile was created with this current object as a output buffer anbd it 
        fills it whenever we write to it (calling the read method of this object will do it for you)
        self.buffer += data

    def read(self, size = -1) :
        """Calling read() on a zip pipe will suck data from the source stream.

        @param  size Maximum size to read - Read whole compressed file if not specified.
        @return Compressed data"""

        # Feed the zipped buffer by writing source data to the zip stream
        while ((len(self.buffer) < size) or (size == -1)) and not self.source_eof :
            # No source given in input
            if self.source == None: break

            # Get a chunk of source data
            chunk = self.source.read(GZipPipe.CHUNCK_SIZE)
            # Feed the source zip file (that fills the compressed buffer)
            # End of source file ?
            if (len(chunk) < GZipPipe.CHUNCK_SIZE) :
                self.source_eof = True

        # We have enough data in the buffer (or source file is EOF): Give it to the output
        if size == 0:
            result = ""
        if size >= 1 :
            result = self.buffer[0:size]
            self.buffer = self.buffer[size:]
        else : # size < 0 : All requested
            result = self.buffer
            self.buffer = ""

        return result


Opinions expressed by DZone contributors are their own.

{{ parent.title || parent.header.title}}

{{ parent.tldr }}

{{ parent.urlSource.name }}