os/ossrv/compressionlibs/ziplib/test/rtest/inflateprimetest/zran.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /* Portions Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies).
     2  * All rights reserved.
     3  */
     4 
     5 /* zran.c -- example of zlib/gzip stream indexing and random access
     6  * Copyright (C) 2005 Mark Adler
     7  * For conditions of distribution and use, see copyright notice in zlib.h
     8    Version 1.0  29 May 2005  Mark Adler */
     9 
    10 /* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary()
    11    for random access of a compressed file.  A file containing a zlib or gzip
    12    stream is provided on the command line.  The compressed stream is decoded in
    13    its entirety, and an index built with access points about every SPAN bytes
    14    in the uncompressed output.  The compressed file is left open, and can then
    15    be read randomly, having to decompress on the average SPAN/2 uncompressed
    16    bytes before getting to the desired block of data.
    17 
    18    An access point can be created at the start of any deflate block, by saving
    19    the starting file offset and bit of that block, and the 32K bytes of
    20    uncompressed data that precede that block.  Also the uncompressed offset of
    21    that block is saved to provide a referece for locating a desired starting
    22    point in the uncompressed stream.  build_index() works by decompressing the
    23    input zlib or gzip stream a block at a time, and at the end of each block
    24    deciding if enough uncompressed data has gone by to justify the creation of
    25    a new access point.  If so, that point is saved in a data structure that
    26    grows as needed to accommodate the points.
    27 
    28    To use the index, an offset in the uncompressed data is provided, for which
    29    the latest access point at or preceding that offset is located in the index.
    30    The input file is positioned to the specified location in the index, and if
    31    necessary the first few bits of the compressed data is read from the file.
    32    inflate is initialized with those bits and the 32K of uncompressed data, and
    33    the decompression then proceeds until the desired offset in the file is
    34    reached.  Then the decompression continues to read the desired uncompressed
    35    data from the file.
    36 
    37    Another approach would be to generate the index on demand.  In that case,
    38    requests for random access reads from the compressed data would try to use
    39    the index, but if a read far enough past the end of the index is required,
    40    then further index entries would be generated and added.
    41 
    42    There is some fair bit of overhead to starting inflation for the random
    43    access, mainly copying the 32K byte dictionary.  So if small pieces of the
    44    file are being accessed, it would make sense to implement a cache to hold
    45    some lookahead and avoid many calls to extract() for small lengths.
    46 
    47    Another way to build an index would be to use inflateCopy().  That would
    48    not be constrained to have access points at block boundaries, but requires
    49    more memory per access point, and also cannot be saved to file due to the
    50    use of pointers in the state.  The approach here allows for storage of the
    51    index in a file.
    52  */
    53 
    54 #include <e32test.h>
    55 #include <stdio.h>
    56 #include <stdlib.h>
    57 #include <string.h>
    58 #include <fcntl.h>
    59 #include <zlib.h>
    60 
    61 _LIT(KTestTitle, "inflatePrime() Test.");
    62 
    63 RTest test(_L("inflateprimetest.exe"));
    64 const int numTestFiles = 2;
    65 const char *filePath = "z:\\test\\inflateprimetest\\\0";
    66 const char *testFile[numTestFiles] = {"gzipped.gz\0", "zipped.zip\0"};
    67 
    68 /* Test macro and function */
    69 void Check(TInt aValue, TInt aExpected, TInt aLine)
    70 	{
    71     if (aValue != aExpected)
    72     	{
    73         test.Printf(_L("*** Expected error: %d, got: %d\r\n"), aExpected, aValue);
    74         test.operator()(EFalse, aLine);
    75         }
    76     }
    77 #define test2(a, b) Check(a, b, __LINE__)
    78 
    79 #define SPAN 1048576L       /* desired distance between access points */
    80 #define WINSIZE 32768U      /* sliding window size */
    81 #define CHUNK 128         /* file input buffer size */
    82 
    83 /* access point entry */
    84 struct point {
    85     off_t out;          /* corresponding offset in uncompressed data */
    86     off_t in;           /* offset in input file of first full byte */
    87     int bits;           /* number of bits (1-7) from byte at in - 1, or 0 */
    88     unsigned char window[WINSIZE];  /* preceding 32K of uncompressed data */
    89 };
    90 
    91 /* access point list */
    92 struct access {
    93     int have;           /* number of list entries filled in */
    94     int size;           /* number of list entries allocated */
    95     struct point *list; /* allocated list */
    96 };
    97 
    98 /* Deallocate an index built by build_index() */
    99 void free_index(struct access *index)
   100 {
   101     if (index != NULL) {
   102         free(index->list);
   103         free(index);
   104     }
   105 }
   106 
   107 /* Add an entry to the access point list.  If out of memory, deallocate the
   108    existing list and return NULL. */
   109 struct access *addpoint(struct access *index, int bits,
   110     off_t in, off_t out, unsigned left, unsigned char *window)
   111 {
   112     struct point *next;
   113 
   114     // if list is empty, create it (start with eight points)
   115     if (index == NULL) {
   116         index = (struct access *)malloc(sizeof(struct access));
   117         if (index == NULL) return NULL;
   118         index->list = (struct point *)malloc(sizeof(struct point) << 3);
   119         if (index->list == NULL) {
   120             free(index);
   121             return NULL;
   122         }
   123         index->size = 8;
   124         index->have = 0;
   125     }
   126 
   127     // if list is full, make it bigger
   128     else if (index->have == index->size) {
   129         index->size <<= 1;
   130         next = (struct point *)realloc(index->list, sizeof(struct point) * index->size);
   131         if (next == NULL) {
   132             free_index(index);
   133             return NULL;
   134         }
   135         index->list = next;
   136     }
   137 
   138     // fill in entry and increment how many we have
   139     next = index->list + index->have;
   140     next->bits = bits;
   141     next->in = in;
   142     next->out = out;
   143     if (left)
   144         memcpy(next->window, window + WINSIZE - left, left);
   145     if (left < WINSIZE)
   146         memcpy(next->window + left, window, WINSIZE - left);
   147     index->have++;
   148 
   149     /* return list, possibly reallocated */
   150     return index;
   151 }
   152 
   153 /* Make one entire pass through the compressed stream and build an index, with
   154    access points about every span bytes of uncompressed output -- span is
   155    chosen to balance the speed of random access against the memory requirements
   156    of the list, about 32K bytes per access point.  Note that data after the end
   157    of the first zlib or gzip stream in the file is ignored.  build_index()
   158    returns the number of access points on success (>= 1), Z_MEM_ERROR for out
   159    of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a
   160    file read error.  On success, *built points to the resulting index. */
   161 int build_index(FILE *in, off_t span, struct access **built)
   162 {
   163     int ret;
   164     off_t totin, totout;        /* our own total counters to avoid 4GB limit */
   165     off_t last;                 /* totout value of last access point */
   166     struct access *index;       /* access points being generated */
   167     z_stream strm;
   168     unsigned char input[CHUNK];
   169     unsigned char window[WINSIZE];
   170 	struct point *next = NULL;
   171 
   172     /* initialize inflate */
   173     strm.zalloc = Z_NULL;
   174     strm.zfree = Z_NULL;
   175     strm.opaque = Z_NULL;
   176     strm.avail_in = 0;
   177     strm.next_in = Z_NULL;
   178     ret = inflateInit2(&strm, 47);      /* automatic zlib or gzip decoding */
   179     if (ret != Z_OK)
   180         return ret;
   181 
   182     /* inflate the input, maintain a sliding window, and build an index -- this
   183        also validates the integrity of the compressed data using the check
   184        information at the end of the gzip or zlib stream */
   185     totin = totout = last = 0;
   186     index = NULL;               /* will be allocated by first addpoint() */
   187     strm.avail_out = 0;
   188     do {
   189         /* get some compressed data from input file */
   190         strm.avail_in = fread(input, 1, CHUNK, in);
   191         if (ferror(in)) {
   192             ret = Z_ERRNO;
   193             goto build_index_error;
   194         }
   195         if (strm.avail_in == 0) {
   196             ret = Z_DATA_ERROR;
   197             goto build_index_error;
   198         }
   199         strm.next_in = input;
   200 
   201         /* process all of that, or until end of stream */
   202         do {
   203             /* reset sliding window if necessary */
   204             if (strm.avail_out == 0) {
   205                 strm.avail_out = WINSIZE;
   206                 strm.next_out = window;
   207             }
   208 
   209             /* inflate until out of input, output, or at end of block --
   210                update the total input and output counters */
   211             totin += strm.avail_in;
   212             totout += strm.avail_out;
   213             ret = inflate(&strm, Z_BLOCK);      /* return at end of block */
   214             totin -= strm.avail_in;
   215             totout -= strm.avail_out;
   216             if (ret == Z_NEED_DICT)
   217                 ret = Z_DATA_ERROR;
   218             if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
   219                 goto build_index_error;
   220             if (ret == Z_STREAM_END)
   221                 break;
   222 
   223             /* if at end of block, consider adding an index entry (note that if
   224                data_type indicates an end-of-block, then all of the
   225                uncompressed data from that block has been delivered, and none
   226                of the compressed data after that block has been consumed,
   227                except for up to seven bits) -- the totout == 0 provides an
   228                entry point after the zlib or gzip header, and assures that the
   229                index always has at least one access point; we avoid creating an
   230                access point after the last block by checking bit 6 of data_type
   231              */
   232             if ((strm.data_type & 128) && !(strm.data_type & 64) &&
   233                 (totout == 0 || totout - last > span)) {
   234                 index = addpoint(index, strm.data_type & 7, totin,
   235                                  totout, strm.avail_out, window);
   236                 if (index == NULL) {
   237                     ret = Z_MEM_ERROR;
   238                     goto build_index_error;
   239                 }
   240                 last = totout;
   241             }
   242         } while (strm.avail_in != 0);
   243     } while (ret != Z_STREAM_END);
   244 
   245     /* clean up and return index (release unused entries in list) */
   246     (void)inflateEnd(&strm);
   247     
   248     next = (struct point *)realloc(index->list, sizeof(struct point) * index->have);
   249     if (next == NULL) {
   250         free_index(index);
   251         return Z_MEM_ERROR;
   252     }
   253     index->list = next;
   254     index->size = index->have;
   255     *built = index;
   256     return index->size;
   257 
   258     /* return error */
   259   build_index_error:
   260     (void)inflateEnd(&strm);
   261     if (index != NULL)
   262         free_index(index);
   263     return ret;
   264 }
   265 
   266 /* Use the index to read len bytes from offset into buf, return bytes read or
   267    negative for error (Z_DATA_ERROR or Z_MEM_ERROR).  If data is requested past
   268    the end of the uncompressed data, then extract() will return a value less
   269    than len, indicating how much as actually read into buf.  This function
   270    should not return a data error unless the file was modified since the index
   271    was generated.  extract() may also return Z_ERRNO if there is an error on
   272    reading or seeking the input file. */
   273 int extract(FILE *in, struct access *index, off_t offset,
   274                   unsigned char *buf, int len)
   275 {
   276     int ret, skip, value;
   277     z_stream strm;
   278     struct point *here;
   279     unsigned char input[CHUNK];
   280     //unsigned char discard[WINSIZE]; /* No longer required. See comments below. */
   281 
   282     /* proceed only if something reasonable to do */
   283     if (len < 0)
   284         return 0;
   285 
   286     /* find where in stream to start */
   287     here = index->list;
   288     ret = index->have;
   289     while (--ret && here[1].out <= offset)
   290         here++;
   291 
   292     /* initialize file and inflate state to start there */
   293     strm.zalloc = Z_NULL;
   294     strm.zfree = Z_NULL;
   295     strm.opaque = Z_NULL;
   296     strm.avail_in = 0;
   297     strm.next_in = Z_NULL;
   298     ret = inflateInit2(&strm, -15);         /* raw inflate */
   299     if (ret != Z_OK)
   300         return ret;
   301     ret = fseek(in, here->in - (here->bits ? 1 : 0), SEEK_SET);
   302     if (ret == -1)
   303         goto extract_ret;
   304     
   305     ret = getc(in);
   306     if (ret == -1) {
   307         ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
   308         goto extract_ret;
   309     }
   310     
   311     // If bits is > 0 set the value as done in the original zran.c
   312     // else set the value to the next byte to prove that inflatePrime
   313     // is not adding anything to the start of the stream when bits is
   314     // set to 0. It is then necessary to unget the byte.
   315 	if(here->bits) {	
   316 	    value = ret >> (8 - here->bits);
   317 	}
   318 	else {
   319 		value = ret;
   320 		ungetc(ret, in);	
   321 	}	
   322 	
   323 	ret = inflatePrime(&strm, here->bits, value);
   324 	if(ret != Z_OK) {
   325 		goto extract_ret;
   326 	}
   327 	test.Printf(_L("zran: bits = %d\n"), here->bits);
   328     test.Printf(_L("zran: value = %d\n"), value); 
   329     
   330     (void)inflateSetDictionary(&strm, here->window, WINSIZE);
   331 
   332 	/* No longer required. See comment below.
   333 	 *
   334      * skip uncompressed bytes until offset reached, then satisfy request
   335     offset -= here->out;
   336      */
   337     strm.avail_in = 0;
   338     skip = 1;                               /* while skipping to offset */
   339     do {
   340         /* define where to put uncompressed data, and how much */
   341         if (skip) {          /* at offset now */
   342             strm.avail_out = len;
   343             strm.next_out = buf;
   344             skip = 0;                       /* only do this once */
   345         }
   346         
   347         /* This code is not required in this test as it is used
   348          * to discard decompressed data between the current
   349          * access point and the offset(place in the file from
   350          * which we wish to decompress data).
   351          * 
   352         if (offset > WINSIZE) {             // skip WINSIZE bytes
   353             strm.avail_out = WINSIZE;
   354             strm.next_out = discard;
   355             offset -= WINSIZE;
   356         }
   357         else if (offset != 0) {             // last skip
   358             strm.avail_out = (unsigned)offset;
   359             strm.next_out = discard;
   360             offset = 0;
   361         }
   362 		*/
   363 		
   364         /* uncompress until avail_out filled, or end of stream */
   365         do {
   366             if (strm.avail_in == 0) {
   367                 strm.avail_in = fread(input, 1, CHUNK, in);
   368                 if (ferror(in)) {
   369                     ret = Z_ERRNO;
   370                     goto extract_ret;
   371                 }
   372                 if (strm.avail_in == 0) {
   373                     ret = Z_DATA_ERROR;
   374                     goto extract_ret;
   375                 }
   376                 strm.next_in = input;
   377             }
   378             ret = inflate(&strm, Z_NO_FLUSH);       /* normal inflate */
   379             if (ret == Z_NEED_DICT)
   380                 ret = Z_DATA_ERROR;
   381             if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
   382                 goto extract_ret;
   383             if (ret == Z_STREAM_END)
   384                 break;
   385         } while (strm.avail_out != 0);
   386 
   387         /* if reach end of stream, then don't keep trying to get more */
   388         if (ret == Z_STREAM_END)
   389             break;
   390 
   391         /* do until offset reached and requested data read, or stream ends */
   392     } while (skip);
   393 
   394     /* compute number of uncompressed bytes read after offset */
   395     ret = skip ? 0 : len - strm.avail_out;
   396 
   397     /* clean up and return bytes read or error */
   398   extract_ret:
   399     (void)inflateEnd(&strm);
   400     return ret;
   401 }
   402 
   403 /* Demonstrate the use of build_index() and extract() by processing the file
   404    provided and then extracting CHUNK bytes at each access point. */
   405 int TestInflatePrime(char *file)
   406 	{
   407     int len;
   408     FILE *in;
   409     struct access *index;
   410     unsigned char buf[CHUNK];
   411 
   412     in = fopen(file, "rb");
   413     if (in == NULL) 
   414     	{
   415         return KErrPathNotFound;
   416     	}
   417 
   418     // build index
   419     len = build_index(in, SPAN, &index);
   420     if (len < 0) 
   421     	{
   422         fclose(in);
   423         test.Printf(_L("error: %d\n"), len);
   424         return KErrGeneral;
   425     	}
   426     test.Printf(_L("zran: built index with %d access points\n"), len);
   427 
   428 	// Extract some data at the start of each access point. This is done
   429 	// so that we can try extracting some data that does not necessarily 
   430 	// start at a byte boundary ie it might start mid byte.
   431     for(int i = 0; i < index->have; i++)
   432 	    {
   433 	    len = extract(in, index, index->list[i].out, buf, CHUNK);
   434 	    if (len < 0)
   435 	    	{
   436 	    	test.Printf(_L("zran: extraction failed: "));
   437 
   438 	    	if(len == Z_MEM_ERROR)
   439                 {
   440                 test.Printf(_L("out of memory error\n"));
   441                 }
   442             else
   443                 {
   444                 test.Printf(_L("input corrupted error\n"));
   445                 }
   446             }
   447 	    else 
   448 	    	{
   449 	        test.Printf(_L("zran: extracted %d bytes at %Lu\n"), len, index->list[i].out);
   450 	    	}	
   451 	    }    
   452 
   453     // clean up and exit
   454     free_index(index);
   455     fclose(in);
   456     
   457     return KErrNone;
   458 	}
   459 
   460 /**
   461 @SYMTestCaseID       	SYSLIB-EZLIB2-UT-4273
   462 @SYMTestCaseDesc     	To check that data can be decompressed at various points in a 
   463                         compressed file (i.e. decompression may start part of the way 
   464                         through a byte) via the use of inflatePrime().
   465 @SYMTestPriority     	Low
   466 @SYMTestActions      	1.	Open a compressed file for reading.
   467                         2.	Create an inflate stream and initialise it using inflateInit2(), 
   468                             setting windowBits to 47 (automatic gzip/zip header detection).
   469                         3.	Inflate the data in the file using inflate(). During inflation 
   470                             create access points using structure Point which maps points 
   471                             in the uncompressed data with points in the compressed data. 
   472                             The first access point should be at the start of the data 
   473                             i.e. after the header.
   474                             
   475                             Structure  Point consist of : 
   476                             •	UPoint(in bytes) – this is the point in the uncompressed data 
   477                             •	CPoint(in bytes) – this is the point in the compressed data
   478                             •	bits(in bits) – this is the point in the compressed data
   479                         4.	Cleanup the inflate stream using inflateEnd().
   480                         5.	For each access point do the following:
   481                             a.	Initialise the inflate stream using inflateInit2(), 
   482                                 setting windowBits to -15.
   483                             b.	Move the file pointer to CPoint - 1 in the input file.
   484                             c.	Calculate the value which will be passed to inflatePrime(). 
   485                                 The algorithm used to calculate value can be seen in the 
   486                                 attached diagram (in the test spec).
   487                             d.	Call inflatePrime() with the bits and value.
   488                             e.	Inflate a small section of in the input file using inflate().
   489                             f.	Cleanup the inflate stream using inflateEnd().
   490                         6.	Close the compressed file and cleanup any allocated memory.
   491                         
   492                         Note: This test should be completed using a zlib file and a gzip 
   493                               file. These files should be 500 – 1000KB in size.
   494 @SYMTestExpectedResults inflatePrime() should return Z_OK and the data should be 
   495                         decompressed with no errors.
   496 @SYMDEF                 REQ7362
   497 */
   498 void RunTestL()
   499 	{
   500 	test.Next(_L(" @SYMTestCaseID:SYSLIB-EZLIB2-UT-4273 "));
   501 	int err;	
   502 	char file[KMaxFileName];
   503 	
   504 	for(int i = 0; i < numTestFiles; i++)
   505 		{
   506 		TBuf<40> testName(_L("inflatePrime test using file "));
   507 		testName.AppendNum(i);
   508 		test.Next(testName);
   509 		
   510 		strcpy(file, filePath);
   511 		strcat(file, testFile[i]);
   512 			
   513 		err = TestInflatePrime(file);
   514 			
   515 		if(err == KErrPathNotFound)
   516 			{
   517 			test.Printf(_L("zran: could not open file number %d for reading\n"), i);
   518 			User::Leave(err);
   519 			}
   520 		else if(err != KErrNone)
   521 			{
   522 			User::Leave(err);
   523 			}
   524 			
   525 		test.Printf(_L("\n"));		
   526 		}
   527 	}
   528 
   529 TInt E32Main()
   530 	{
   531 	__UHEAP_MARK;
   532 
   533 	test.Printf(_L("\n"));
   534 	test.Title();
   535 	test.Start(KTestTitle);
   536 
   537 	CTrapCleanup* cleanup = CTrapCleanup::New();
   538 
   539 	TRAPD(err, RunTestL());
   540 	test2(err, KErrNone);
   541 	
   542 	test.End();
   543 	test.Close();
   544 	delete cleanup;
   545 
   546 	__UHEAP_MARKEND;
   547 	return KErrNone;
   548 	}