Monday, March 3, 2014

Compressing using bz2 library in C++



For Compiling:
$ g++ code.cpp -lbz2
 

Need to open two FILE*
one to read from and second to write compressed data 

#include <bzlib.h>

int bzerror = BZ_OK;
BZFILE *bfp = BZ2_bzWriteOpen(&bzerror, fpout, 9, 0, 30);
if (bzerror != BZ_OK)
{
    BZ2_bzWriteClose(&bzerror, bfp, 0, NULL, NULL);
    fclose(fpin);
    fclose(fpout);
    return 1;
}

memset(buf, 0, nBuf);
while (fgets(buf, nBuf, fpin) != NULL)
{
    len = strlen(buf);
    BZ2_bzWrite(&bzerror, bfp, buf, len);
    if (bzerror == BZ_IO_ERROR)
    {
        std::cout << "bz-io-error detected\n";
        break;
    }
    memset(buf, 0, nBuf);
}
BZ2_bzWriteClose(&bzerror, bfp, 0, NULL, NULL);

// ---- end ----


BZFILE *BZ2_bzWriteOpen( int *bzerror, 
                                                   FILE *f, 
                                                   int blockSize100k
                                                   int verbosity
                                                   int workFactor );

- blockSize100k specifies the block size to be used for compression
- verbosity should be set to a number between 0 and 4 inclusive. 
  0 is silent, and greater numbers give increasingly verbose monitoring/debugging output 
workFactor controls how the compression phase behaves when presented with worst case,   highly repetitive, input data. 
  If compression runs into difficulties caused by repetitive data,the library switches from the standard sorting algorithm to a fallback algorithm. 
  The fallback is slower than the standard algorithm by perhaps a factor of three, but always behaves reasonably, no matter how bad the input


=====================================

Reading Compressed file ie Extracting Data

#include <bzlib.h>

int nBuf = 512;
memset(buf, 0, nBuf);

bzerror = BZ_OK;
while (bzerror == BZ_OK)
{
    BZ2_bzRead(&bzerror, bfp, (char*)buf, nBuf);
    if (bzerror == BZ_IO_ERROR)
    {
        std::cout << "bz-io-error detected\n";
        break;
    }
    std::cout << buf;
    memset(buf, 0, nBuf);
}
BZ2_bzReadClose(&bzerror, bfp);
fclose(fpin);

// ---- end ----




BZFILE *BZ2_bzReadOpen(int *bzerror, 
                       FILE *f, 
                       int verbosity, 
                       int small
                       void *unused
                       int nUnused );

- If small is 1, the library will try to decompress using less memory, at the expense of speed.
- decompress the nUnused bytes starting at unused