Basic gzip/gunzip in memory buffer examples using zlib code.

Dependencies:   mbed-rtos mbed

There are small changes needed to the zconf.h file in the zlib distribution (I used 1.2.7). The zlib license applies to the zlib code - I have only imported a subset of the source.

The MBED has limited memory, so we need the following (near the top of zconf.h) to restrict memory allocation sizes:

#define    MAX_MEM_LEVEL    3
#define    MAX_WBITS        10

Because MAX_MEM_LEVEL and MAX_WBITS are so much lower than the default, there is a danger that the mbed cannot gunzip data compressed by a 'normal' zlib build. My use-case is to gzip on the mbed more than gunzip on the mbed so I have not given much time to this issue.

I also included this (also near the top of zconf.h) to prefix defines with Z_

#define    Z_PREFIX

In zconf.h, in the zlib distribution, the includes for <fcntl.h> and <sys/types.h> need commenting out when using the online compiler. No need when using GCC4MBED.

I also looked at miniz. I chose zlib because I needed the gzip headers and miniz does not implement them.

The sample main.cpp reads source data, compresses it, decompresses it, and finally compares the input data with the output data to confirm they are the same.

    unsigned char input_data[2048];
    unsigned long input_data_length = 0;
    FILE *ifp = fopen("/local/src.txt", "r");
    if (ifp) {
        int br = fread(input_data, 1, sizeof(input_data), ifp);
        fclose(ifp);
        input_data_length = br;
    }
    printf("%s:%d: input_data_length:%lu%s", __FILE__, __LINE__, input_data_length, newline);
 
 
    unsigned char gzip_data[2048];
    unsigned long gzip_data_length = 0;
    if (input_data_length > 0) {
        gzip_data_length = sizeof(gzip_data);
        int rv = gzip(gzip_data, &gzip_data_length, input_data, input_data_length);
        if (Z_OK == rv) {
            FILE *ofp = fopen("/local/dst.gz", "w");
            if (ofp) {
                int bw = fwrite(gzip_data, 1, gzip_data_length, ofp);
                fclose(ofp);
            }
        } else {
            printf("%s:%d: %d%s", __FILE__, __LINE__, rv, newline);
        }
    }
    printf("%s:%d: gzip_data_length:%lu%s", __FILE__, __LINE__, gzip_data_length, newline);
 
 
    unsigned char output_data[2048];
    unsigned long output_data_length = 0;
    if (gzip_data_length > 0) {
        output_data_length = sizeof(output_data);
        int rv = gunzip(output_data, &output_data_length, gzip_data, gzip_data_length);
        if (Z_OK != rv) {
            printf("%s:%d: %d%s", __FILE__, __LINE__, rv, newline);
        }
    }
    printf("%s:%d: output_data_length:%lu%s", __FILE__, __LINE__, output_data_length, newline);
 
 
    if (input_data_length > 0 and input_data_length > 0) {
        bool input_matches_output = false;
        if (input_data_length == output_data_length) {
            input_matches_output = true;
            for ( size_t i = 0 ; input_matches_output && i < input_data_length ; i++ ) {
                if (input_data[i] != output_data[i]) {
                    input_matches_output = false;
                }
            }
        }
        printf("%s:%d: input (%lu bytes) %s output (%lu bytes)%s", __FILE__, __LINE__, input_data_length, input_matches_output?"matches":"does not match", output_data_length, newline);
    } else {
        printf("%s:%d: input and/or output length is 0%s", __FILE__, __LINE__, newline);
    }

zlib/inflate.h

Committer:
jonathonfletcher
Date:
2012-10-21
Revision:
0:54f5be781526

File content as of revision 0:54f5be781526:

/* inflate.h -- internal inflate state definition
 * Copyright (C) 1995-2009 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

/* WARNING: this file should *not* be used by applications. It is
   part of the implementation of the compression library and is
   subject to change. Applications should only use zlib.h.
 */

/* define NO_GZIP when compiling if you want to disable gzip header and
   trailer decoding by inflate().  NO_GZIP would be used to avoid linking in
   the crc code when it is not needed.  For shared libraries, gzip decoding
   should be left enabled. */
#ifndef NO_GZIP
#  define GUNZIP
#endif

/* Possible inflate modes between inflate() calls */
typedef enum {
    HEAD,       /* i: waiting for magic header */
    FLAGS,      /* i: waiting for method and flags (gzip) */
    TIME,       /* i: waiting for modification time (gzip) */
    OS,         /* i: waiting for extra flags and operating system (gzip) */
    EXLEN,      /* i: waiting for extra length (gzip) */
    EXTRA,      /* i: waiting for extra bytes (gzip) */
    NAME,       /* i: waiting for end of file name (gzip) */
    COMMENT,    /* i: waiting for end of comment (gzip) */
    HCRC,       /* i: waiting for header crc (gzip) */
    DICTID,     /* i: waiting for dictionary check value */
    DICT,       /* waiting for inflateSetDictionary() call */
        TYPE,       /* i: waiting for type bits, including last-flag bit */
        TYPEDO,     /* i: same, but skip check to exit inflate on new block */
        STORED,     /* i: waiting for stored size (length and complement) */
        COPY_,      /* i/o: same as COPY below, but only first time in */
        COPY,       /* i/o: waiting for input or output to copy stored block */
        TABLE,      /* i: waiting for dynamic block table lengths */
        LENLENS,    /* i: waiting for code length code lengths */
        CODELENS,   /* i: waiting for length/lit and distance code lengths */
            LEN_,       /* i: same as LEN below, but only first time in */
            LEN,        /* i: waiting for length/lit/eob code */
            LENEXT,     /* i: waiting for length extra bits */
            DIST,       /* i: waiting for distance code */
            DISTEXT,    /* i: waiting for distance extra bits */
            MATCH,      /* o: waiting for output space to copy string */
            LIT,        /* o: waiting for output space to write literal */
    CHECK,      /* i: waiting for 32-bit check value */
    LENGTH,     /* i: waiting for 32-bit length (gzip) */
    DONE,       /* finished check, done -- remain here until reset */
    BAD,        /* got a data error -- remain here until reset */
    MEM,        /* got an inflate() memory error -- remain here until reset */
    SYNC        /* looking for synchronization bytes to restart inflate() */
} inflate_mode;

/*
    State transitions between above modes -

    (most modes can go to BAD or MEM on error -- not shown for clarity)

    Process header:
        HEAD -> (gzip) or (zlib) or (raw)
        (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT ->
                  HCRC -> TYPE
        (zlib) -> DICTID or TYPE
        DICTID -> DICT -> TYPE
        (raw) -> TYPEDO
    Read deflate blocks:
            TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK
            STORED -> COPY_ -> COPY -> TYPE
            TABLE -> LENLENS -> CODELENS -> LEN_
            LEN_ -> LEN
    Read deflate codes in fixed or dynamic block:
                LEN -> LENEXT or LIT or TYPE
                LENEXT -> DIST -> DISTEXT -> MATCH -> LEN
                LIT -> LEN
    Process trailer:
        CHECK -> LENGTH -> DONE
 */

/* state maintained between inflate() calls.  Approximately 10K bytes. */
struct inflate_state {
    inflate_mode mode;          /* current inflate mode */
    int last;                   /* true if processing last block */
    int wrap;                   /* bit 0 true for zlib, bit 1 true for gzip */
    int havedict;               /* true if dictionary provided */
    int flags;                  /* gzip header method and flags (0 if zlib) */
    unsigned dmax;              /* zlib header max distance (INFLATE_STRICT) */
    unsigned long check;        /* protected copy of check value */
    unsigned long total;        /* protected copy of output count */
    gz_headerp head;            /* where to save gzip header information */
        /* sliding window */
    unsigned wbits;             /* log base 2 of requested window size */
    unsigned wsize;             /* window size or zero if not using window */
    unsigned whave;             /* valid bytes in the window */
    unsigned wnext;             /* window write index */
    unsigned char FAR *window;  /* allocated sliding window, if needed */
        /* bit accumulator */
    unsigned long hold;         /* input bit accumulator */
    unsigned bits;              /* number of bits in "in" */
        /* for string and stored block copying */
    unsigned length;            /* literal or length of data to copy */
    unsigned offset;            /* distance back to copy string from */
        /* for table and code decoding */
    unsigned extra;             /* extra bits needed */
        /* fixed and dynamic code tables */
    code const FAR *lencode;    /* starting table for length/literal codes */
    code const FAR *distcode;   /* starting table for distance codes */
    unsigned lenbits;           /* index bits for lencode */
    unsigned distbits;          /* index bits for distcode */
        /* dynamic table building */
    unsigned ncode;             /* number of code length code lengths */
    unsigned nlen;              /* number of length code lengths */
    unsigned ndist;             /* number of distance code lengths */
    unsigned have;              /* number of code lengths in lens[] */
    code FAR *next;             /* next available space in codes[] */
    unsigned short lens[320];   /* temporary storage for code lengths */
    unsigned short work[288];   /* work area for code table building */
    code codes[ENOUGH];         /* space for code tables */
    int sane;                   /* if false, allow invalid distance too far */
    int back;                   /* bits back of last unprocessed length/lit */
    unsigned was;               /* initial length of match */
};