/* $Id: compress.c,v 1.70 2005/05/12 15:41:04 karman Exp $ ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94 ** This file is part of Swish-e. Swish-e is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. Swish-e is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Swish-e; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA See the COPYING file that accompanies the Swish-e distribution for details of the GNU GPL and the special exception available for linking against the Swish-e library. ** Mon May 9 15:51:39 CDT 2005 ** added GPL ** karman: how much of this file is still original?? ** 2001-02-12 rasc errormsg "print" changed... ** */ #include "swish.h" #include "swstring.h" #include "compress.h" #include "mem.h" #include "error.h" #include "merge.h" #include "search.h" #include "docprop.h" #include "index.h" #include "hash.h" #include "ramdisk.h" #include "swish_qsort.h" #include "file.h" #ifdef HAVE_ZLIB #include #define Z_BUFSIZE 16384 #endif /* Surfing the web I found this: ** it is a very simple macro that can be used in *PACKLONG* routines to ** detect if we need to spend some cycles for [un]packing the number in a ** portable format */ #ifndef LITTLE_ENDIAN static const int swish_endian_test_value = 1; #define LITTLE_ENDIAN (*(const unsigned char *)&swish_endian_test_value) #endif /* 2001-05 jmruiz */ /* Routines for compressing numbers - Macros converted to routines */ /* 2002-11 jmruiz */ /* Get required size in bytes for a given compressed number */ int sizeofcompint(int number) { int size = 0; do { size++; } while ((number >>= 7)); return size; } /* Compress a number and writes it to a file */ void compress1(int num, FILE * fp, int (*f_putc) (int, FILE *)) { int _i = 0, _r = num; unsigned char _s[MAXINTCOMPSIZE]; /* Trivial case: 0 */ if(!_r) { if (f_putc(0,fp) == EOF ) progerrno("compress1 failed to write null: "); return; } /* Any other case ... */ while (_r) { _s[_i++] = _r & 127; _r >>= 7; } while (--_i >= 0) if ( f_putc(_s[_i] | (_i ? 128 : 0), fp) == EOF ) progerrno("compress1 failed to write: "); } /* Compress a number and writes it to a buffer */ /* buffer must be previously allocated */ /* returns the decreased buffer pointer after storing the compressed number in it */ unsigned char *SW_compress2(int num, unsigned char *buffer) { int _i = num; /* Trivial case: 0 */ if(!_i) { *buffer-- = 0; return 0; } /* Any other case ... */ while (_i) { *buffer = _i & 127; if (_i != num) *buffer |= 128; _i >>= 7; buffer--; } return buffer; } /* Compress a number and writes it to a buffer */ /* buffer must be previously allocated */ /* returns the incrmented buffer pointer after storing the compressed number in it */ unsigned char *compress3(int num, unsigned char *buffer) { int _i = 0, _r = num; unsigned char _s[MAXINTCOMPSIZE]; /* Trivial case: 0 */ if(!_r) { *buffer++ = 0; return buffer; } /* Any other case ... */ while (_r) { _s[_i++] = _r & 127; _r >>= 7; } while (--_i >= 0) *buffer++ = (_s[_i] | (_i ? 128 : 0)); return buffer; } /* Uncompress a number from a file */ int uncompress1(FILE * fp, int (*f_getc) (FILE *)) { int _c; int num = 0; /* printf("uncompress: _c = %d num = %d\n", _c, num); */ do { _c = (int) f_getc(fp); if (_c < 0) { progerr("_c is < 0 in uncompress1()\n"); } num <<= 7; num |= _c & 127; /* printf("uncompress: _c = %d num = %d\n", _c, num); */ if (!num) break; } while (_c & 128); return num; } /* same routine but this works with a memory forward buffer instead of file */ /* it also increases the buffer pointer */ int uncompress2(unsigned char **buffer) { int _c; int num = 0; unsigned char *p = *buffer; do { _c = (int) ((unsigned char) *p++); num <<= 7; num |= _c & 127; if (!num) break; } while (_c & 128); *buffer = p; return num; } /* Routines to make long integers portable */ unsigned long PACKLONG(unsigned long num) { unsigned long tmp = 0L; unsigned char *s; int sz_long = sizeof(unsigned long); if (num && LITTLE_ENDIAN) { s = (unsigned char *) &tmp; while(sz_long) *s++ = (unsigned char) ((num >> ((--sz_long)<<3)) & 0xFF); return tmp; } return num; } /* Same routine - Packs long in buffer */ void PACKLONG2(unsigned long num, unsigned char *s) { int sz_long = sizeof(unsigned long); if(LITTLE_ENDIAN) { while(sz_long) *s++ = (unsigned char) ((num >> ((--sz_long)<<3)) & 0xFF); } else { memcpy(s,(unsigned char *)&num,sz_long); } } unsigned long UNPACKLONG(unsigned long num) { int sz_long = sizeof(unsigned long); unsigned long tmp = 0; unsigned char *s = (unsigned char *) # if(LITTLE_ENDIAN) { while(sz_long) tmp += *s++ << ((--sz_long)<<3); return tmp; } return num; } /* Same macro - UnPacks long from buffer */ unsigned long UNPACKLONG2(unsigned char *s) { int sz_long = sizeof(unsigned long); unsigned long tmp = 0; if(LITTLE_ENDIAN) { while(sz_long) tmp += *s++ << ((--sz_long)<<3); } else { memcpy((unsigned char *)&tmp,s,sz_long); } return tmp; } /* 2003/10/28 jmruiz - Routines to make file offsets portable */ /* sw_off_t is a type defined in config.h to be 32 or 64 bit */ sw_off_t PACKFILEOFFSET(sw_off_t num) { sw_off_t tmp = (sw_off_t)0; unsigned char *s; int sz_off_t = sizeof(sw_off_t); if (num && LITTLE_ENDIAN) { s = (unsigned char *) &tmp; while(sz_off_t) *s++ = (unsigned char) ((num >> (sw_off_t)((--sz_off_t)<<3)) & (sw_off_t)0xFF); return tmp; } return num; } /* Same routine - Packs file offset into a buffer */ void PACKFILEOFFSET2(sw_off_t num, unsigned char *s) { int sz_off_t = sizeof(sw_off_t); if(LITTLE_ENDIAN) { while(sz_off_t) *s++ = (unsigned char) ((num >> (sw_off_t)((--sz_off_t)<<3)) & (sw_off_t)0xFF); } else { memcpy(s,(unsigned char *)&num,sz_off_t); } } /* Routine to unpack a file offset */ sw_off_t UNPACKFILEOFFSET(sw_off_t num) { int sz_off_t = sizeof(sw_off_t); sw_off_t tmp = (sw_off_t)0; unsigned char *s = (unsigned char *) # if(LITTLE_ENDIAN) { while(sz_off_t) tmp += (sw_off_t)((sw_off_t)(*s++) << (sw_off_t)((--sz_off_t)<<3)); return tmp; } return num; } /* Same routine - UnPacks file offset from buffer */ sw_off_t UNPACKFILEOFFSET2(unsigned char *s) { int sz_off_t = sizeof(sw_off_t); sw_off_t tmp = (sw_off_t)0; if(LITTLE_ENDIAN) { while(sz_off_t) tmp += (sw_off_t)((sw_off_t)(*s++) << (sw_off_t)((--sz_off_t)<<3)); } else { memcpy((unsigned char *)&tmp,s,sz_off_t); } return tmp; } /*********************************************************************************** * 09/00 Jose Ruiz * Function to compress location data in memory * * Compresses a LOCATION entry * * A single position LOCATION goes from 20 to 3 bytes. * three positions goes from 28 to 5. * ************************************************************************************/ #define IS_FLAG 0x80 /* Binary 10000000 */ #define COMMON_STRUCTURE 0x60 /* Binary 01100000 */ #define COMMON_IN_FILE 0x20 /* Binary 00100000 */ #define COMMON_IN_HTML_BODY 0x40 /* Binary 01000000 */ #define POS_4_BIT 0x10 /* Binary 00010000 */ /************************************************************************ From Jose on Feb 13, 2002 IS_FLAG is to indicate that the byte is a flag. As far as I remember, I needed it to avoid null values. When COMMON_STRUCTURE is on, this means that all the positions have the same structure value. This helps a lot with non html files and can save a lot of space. When FREQ_AND_POS_EQ_1 is on, this means that freq is 1 and pos[0]=1. Mmm, I am not sure if this is very useful now. Let me explain better. This was useful for xml files with fields that contains just one value. For example: 00001 20001231 But, now, I am not sure if this is useful because long time ago I changed the position counter to not be reseted after a each field change. I need to check this. POS_4_BIT indicates that all positions are within 16 positions of each other and can thus be stored as 2 per byte. Position numbers are stored as a delta from the previous position. Here's indexing /usr/doc: 23840 files indexed. 177638538 total bytes. 19739102 total words. Elapsed time: 00:04:42 CPU time: 00:03:09 Indexing done! 4 bit = 843,081 (total length = 10,630,425) 12 bytes/chunk not 4 bit = 13,052,904 (length 83,811,498) 6 bytes/chunk I wonder if storing the initial postion would improve that much. *************************************************************************/ void compress_location_values(unsigned char **buf,unsigned char **flagp,int filenum,int frequency, unsigned int *posdata) { unsigned char *p = *buf; unsigned char *flag; int structure = GET_STRUCTURE(posdata[0]); int common_structure = COMMON_STRUCTURE; int i; /* Make room for flag and init it */ flag = p; *flagp = p; p++; *flag = IS_FLAG; /* Add file number */ p = compress3(filenum, p); /* Check for special case frequency == 1 and position[0] < 128 && structure == IN_FILE */ if(frequency == 1 && (GET_POSITION(posdata[0]) < 128) && structure == IN_FILE) { /* Remove IS_FLAG and store position in the lower 7 bits */ /* In this way we have 0bbbbbbb in *flag ** where bbbbbbb is the position and the leading 0 bit ** indicates that frequency is 1 and position is < 128 */ *flag = (unsigned char) ((int)(GET_POSITION(posdata[0]))); } else { /* Otherwise IS_FLAG is set */ /* Now, let's see if all positions have the same structure to ** get better compression */ for(i=1;i 0 ; i--) { posdata[i] = SET_POSDATA(GET_POSITION(posdata[i]) - GET_POSITION(posdata[i-1]),GET_STRUCTURE(posdata[i])); if( GET_POSITION(posdata[i]) >= 16) (*flag) &= ~POS_4_BIT; } /* Always write first position "as is" */ p = compress3(GET_POSITION(posdata[0]), p); /* write the position data starting at 1 */ if((*flag) & POS_4_BIT) { for (i = 1, j = 0; i < frequency ; i++, j++) { if(j % 2) p[j/2] |= (unsigned char) GET_POSITION(posdata[i]); else p[j/2] = (unsigned char) GET_POSITION(posdata[i]) << 4; } p += ((j + 1)/2); } else { for (i = 1; i < frequency; i++) p = compress3(GET_POSITION(posdata[i]), p); } /* Write out the structure bytes */ if(! (*flag & COMMON_STRUCTURE)) for(i = 0; i < frequency; i++) *p++ = (unsigned char) GET_STRUCTURE(posdata[i]); *buf = p; } } static unsigned char *compress_location(SWISH * sw, LOCATION * l) { unsigned char *p, *q; int i, max_size; unsigned char *flag; struct MOD_Index *idx = sw->Index; /* check if the work buffer is long enough */ /* just to avoid bufferoverruns */ /* In the worst case and integer will need MAXINTCOMPSIZE bytes */ /* but fortunatelly this is very uncommon */ /* 2002/01 JMRUIZ ** Added an extra byte (MAXINTCOMPSIZE+1) for each position's structure */ max_size = sizeof(unsigned char) + sizeof(LOCATION *) + (((sizeof(LOCATION) / sizeof(int) + 1) + (l->frequency - 1)) * (MAXINTCOMPSIZE + sizeof(unsigned char))); /* reallocate if needed */ if (max_size > idx->len_compression_buffer) { idx->len_compression_buffer = max_size + 200; idx->compression_buffer = erealloc(idx->compression_buffer, idx->len_compression_buffer); } /* Pointer to the buffer */ p = idx->compression_buffer; /* Add extra bytes for handling linked list */ //***JMRUIZ memcpy(p,&l->next,sizeof(LOCATION *)); p += sizeof(LOCATION *); /* Add the metaID */ p = compress3(l->metaID,p); compress_location_values(&p,&flag,l->filenum,l->frequency, l->posdata); compress_location_positions(&p,flag,l->frequency,l->posdata); /* Get the length of all the data */ i = p - idx->compression_buffer; /* Did we underrun our buffer? */ if (i > idx->len_compression_buffer) progerr("Internal error in compress_location routine"); q = (unsigned char *) Mem_ZoneAlloc(idx->currentChunkLocZone, i); memcpy(q, idx->compression_buffer, i); return (unsigned char *) q; } void uncompress_location_values(unsigned char **buf,unsigned char *flag, int *filenum,int *frequency) { unsigned char *p = *buf; *frequency = 0; *flag = *p++; if(!((*flag) & IS_FLAG)) { *frequency = 1; } else (*frequency) |= (*flag) & 15; /* Binary 00001111 */ *filenum = uncompress2(&p); if(! (*frequency)) *frequency = uncompress2(&p); *buf = p; } unsigned long four_bit_count = 0; unsigned long four_bit_bytes = 0; unsigned long not_four = 0; unsigned long not_four_bytes = 0; unsigned long four_bit_called = 0; unsigned long not_four_called; void uncompress_location_positions(unsigned char **buf, unsigned char flag, int frequency, unsigned int *posdata) { int i, j, tmp; unsigned char *p = *buf; int common_structure = 0; int structure = 0; /* Check for special case frequency == 1 and position[0] < 128 and structure == IN_FILE */ if (!(flag & IS_FLAG)) { structure = IN_FILE; posdata[0] = SET_POSDATA((int)(flag),structure); } else { /* Check for common structure */ if ((tmp =(flag & COMMON_STRUCTURE))) { common_structure = COMMON_STRUCTURE; switch(tmp) { case COMMON_IN_FILE: structure = IN_FILE; break; case COMMON_IN_HTML_BODY: structure = IN_FILE | IN_BODY; break; default: structure = (int)((unsigned char) *p++); break; } } /* First position is always "as is" */ posdata[0] = (unsigned int)uncompress2(&p); /* Check if positions where stored as two values per byte or the old "compress" style */ if(flag & POS_4_BIT) { for (i = 1, j = 0; i < frequency; i++, j++) { if(j%2) posdata[i] = (unsigned int)((unsigned int)p[j/2] & (unsigned int)0x0F); else posdata[i] = (unsigned int)((unsigned int)p[j/2] >> (unsigned int)4); } p += ((j + 1)/2); } else { for (i = 1; i < frequency; i++) { tmp = uncompress2(&p); posdata[i] = (unsigned int)tmp; } } /* Position were compressed incrementally. So restore them */ for(i = 1; i < frequency; i++) posdata[i] += posdata[i-1]; /* Get structure */ for(i = 0; i < frequency; i++) { if(!common_structure) structure = (int)((unsigned char) *p++); posdata[i] = SET_POSDATA(posdata[i],structure); } } /* Update buffer pointer */ *buf = p; } /* 09/00 Jose Ruiz ** Compress all non yet compressed location data of an entry */ void CompressCurrentLocEntry(SWISH * sw, ENTRY * e) { LOCATION *l, *prev, *next, *comp; for(l = e->currentChunkLocationList,prev = NULL ; l != e->currentlocation; ) { next = l->next; comp = (LOCATION *) compress_location(sw, l); if(l == e->currentChunkLocationList) e->currentChunkLocationList =comp; if(prev) memcpy(prev, &comp, sizeof(LOCATION *)); /* Use memcpy to avoid alignment problems */ prev = comp; l = next; } e->currentlocation = e->currentChunkLocationList; } /* 2002/11 jmruiz ** Simple routine to compress worddata using zlib where available ** ** 2004/06 jmruiz ** economic flag is for use less RAM ** Trying to compress worddata needs some extra RAM in order to call ** zlib's compress2 routine because this routine needs a buffer ** for storing the compressed data. ** So, if someone is trying to index in economic mode (-e switch) ** he can experiment the annoying "Out of RAM" message if his computer ** does not have enough RAM for allocating that buffer. ** In order to fix it, I have tried the low level deflate routines of zlib's ** (deflateInit, deflate and deflateEnd) with two local buffers: ** local_buffer_in and local_buffer_out. ** The original data is being copied in chunks to local_buffer_in ** and compressed to local_buffer_out after each call to zlib's deflate ** routine. The compressed chunks are ithen copied to the original worddata ** area taking care of not to overrun the buffer. ** ** On exit returns the new size of the compressed buffer */ int compress_worddata(unsigned char *wdata,int wdata_size, int economic) { #ifndef HAVE_ZLIB return wdata_size; #else unsigned char *WDataBuf; /* For compressing and uncompressing */ uLongf dest_size; int zlib_status = 0; int off_wdata, len_chunk, off_out; unsigned char local_buffer_out[Z_BUFSIZE];/* Just to avoid emalloc/efree overhead and for deflate method */ unsigned char local_buffer_in[Z_BUFSIZE];/* Just to avoid emalloc/efree overhead*/ /* Don't bother compressing smaller items */ if ( wdata_size < MIN_WORDDATA_COMPRESS_SIZE ) return wdata_size; if(economic) { /* -e switch is set. Use deflate* routines */ z_stream z; /* zlib compression stream */ z.zalloc = (alloc_func)0; /* init zlib compression stream */ z.zfree = (free_func)0; z.opaque = (voidpf)0; if(Z_OK != deflateInit(&z, 9)) return wdata_size; z.avail_in = 0; z.next_out = (Bytef*)local_buffer_out; z.avail_out = Z_BUFSIZE; dest_size = 0; off_wdata = 0; off_out = 0; for(;;) { if (off_wdata == wdata_size) break; /* No more data */ else { if (z.avail_in==0) { /* Fill local_buffer_in with more data */ len_chunk = Min(Z_BUFSIZE,(wdata_size - off_wdata)); if(!len_chunk) /* No more data to compress: exit */ break; memcpy(local_buffer_in,wdata + off_wdata, len_chunk); off_wdata += len_chunk; z.next_in = local_buffer_in; z.avail_in = len_chunk; } } /* Compress local_buffer_in */ /* Z_NO_FLUSH flag achieves better results */ zlib_status = deflate(&z, Z_NO_FLUSH); /* get the size of compressed data */ len_chunk = Z_BUFSIZE - z.avail_out; if(len_chunk) { /* Check for buffer overrun */ if((off_out + len_chunk) >= off_wdata) { /* We are in buffer overrun condition but if we are in ** the first chunk we can recover the original data ** from local_buffer_in */ if(off_wdata <= Z_BUFSIZE) { deflateEnd(&z); memcpy(wdata,local_buffer_in,wdata_size); /* Do nothing - Retains data uncompressed */ return wdata_size; } else progerr("WordData Compression Error. Unable to compress worddata in economic mode. Remove switch -e from your command line or add \"CompressPositions Yes\" to your config file"); } /* Copy the compressed data onto the original buffer */ /* off_out contains the current length of the total ** compressed data */ memcpy(wdata + off_out, local_buffer_out, len_chunk); off_out += len_chunk; } /* reset local_buffer_out to next step */ z.next_out = (Bytef*)local_buffer_out; z.avail_out = Z_BUFSIZE; if(zlib_status != Z_OK) break; } /* We have used Z_NO_FLUSH to achieve better ** results. So, we have to issue a deflate with ** Z_FINISH flag to flush the pending data ** in local_buffer_out */ for(;;) { zlib_status = deflate(&z, Z_FINISH); /* get the size of compressed data */ len_chunk = Z_BUFSIZE - z.avail_out; if(len_chunk) { /* Check for buffer overrun */ if((off_out + len_chunk) >= off_wdata) { /* We are in buffer overrun condition but if we are in ** the first chunk we can recover the original data ** from local_buffer_in */ if(off_wdata <= Z_BUFSIZE) { deflateEnd(&z); memcpy(wdata,local_buffer_in,wdata_size); /* Do nothing - Retains data uncompressed */ return wdata_size; } else progerr("WordData Compression Error. Unable to compress worddata in economic mode. Remove switch -e from your command line or add \"CompressPositions Yes\" to your config file"); } /* Copy the compressed data onto the original buffer */ /* off_out contains the current length of the total ** compressed data */ memcpy(wdata + off_out, local_buffer_out, len_chunk); off_out += len_chunk; } /* reset local_buffer_out to next step */ z.next_out = (Bytef*)local_buffer_out; z.avail_out = Z_BUFSIZE; if(zlib_status != Z_OK) break; } deflateEnd(&z); dest_size = off_out; } else { /* Buffer should be +1% + a few bytes. */ dest_size = (uLongf)(wdata_size + ( wdata_size / 100 ) + 1000); // way more than should be needed /* Get an output buffer */ if( dest_size > Z_BUFSIZE ) WDataBuf = (unsigned char *) emalloc((int)dest_size ); else WDataBuf = local_buffer_out; zlib_status = compress2((Bytef *)WDataBuf, &dest_size, wdata, wdata_size, 9); if ( zlib_status != Z_OK ) progerr("WordData Compression Error. zlib compress2 returned: %d Worddata size: %d compress buf size: %d", zlib_status, wdata_size, (int)dest_size); /* Make sure it's compressed enough -- should check that destsize is not > MAXINT */ if ( (int)dest_size < wdata_size ) { memcpy(wdata,WDataBuf,(int)dest_size); } else { dest_size = wdata_size; } if ( WDataBuf != local_buffer_out) efree(WDataBuf); } return (int)dest_size; #endif } /* 2002/11 jmruiz ** Routine to uncompress worddata */ void uncompress_worddata(unsigned char **buf, int *buf_size, int saved_bytes) { #ifdef HAVE_ZLIB unsigned char *new_buf; int zlib_status = 0; uLongf new_buf_size = (uLongf)(*buf_size + saved_bytes); if(! saved_bytes) /* nothing to do */ return; new_buf= (unsigned char *) emalloc(*buf_size + saved_bytes); zlib_status = uncompress(new_buf, &new_buf_size, *buf, (uLongf)buf_size ); if ( zlib_status != Z_OK ) { // $$$ make sure this works ok if returning null $$$ progwarn("Failed to uncompress Property. zlib uncompress returned: %d. uncompressed size: %d buf_len: %d saved_bytes: %d\n", zlib_status, new_buf_size, *buf_size, saved_bytes ); return; } efree(*buf); *buf_size = (int)new_buf_size; *buf = new_buf; #else if ( saved_bytes ) progerr("The index was created with zlib compression.\n This version of swish was not compiled with zlib"); #endif } /* 2002/09 jmruiz ** This routine changes longs in worddata by shorter compressed ** numbers. ** ** Here are two reasons for using compressed numbers in worddata ** instead of longs: ** - Compressed numbers are more portable: longs are usually 4 bytes ** long in a 32 bit machine but in a 64 bit alpha they are 8 bytes ** long (this a waste of space). ** - The obvious one is that compressed numbers use less disk space ** ** BTW, Any change in worddata will also affect to dump.c, merge.c and search.c ** (getfileinfo routine). ** ** worddata has the following format before entering the routine ** ... ** ** Entering this routine nextposmetaID is the offset to next metaid ** in bytes starting to count them from the begining of worddata. ** It is a packed long number (sizeof(long) bytes). ** ** Exiting this routine, nextposmetaID has changed to be the size of ** the data block and is stored as a compressed number. ** ** In other words, worddata has the following format: ** ... ** */ void remove_worddata_longs(unsigned char *worddata,int *sz_worddata) { unsigned char *src,*dst; //source and dest pointers for worddata unsigned int metaID, tfrequency, data_len; unsigned long nextposmetaID; src = worddata; /* Jump over tfrequency and get first metaID */ tfrequency = uncompress2(&src); /* tfrequency */ metaID = uncompress2(&src); /* metaID */ dst = src; while(1) { /* Get offset to next one */ nextposmetaID = UNPACKLONG2(src); src += sizeof(long); /* Compute data length for this metaID */ data_len = (int)nextposmetaID - (src - worddata); /* Store data_len as a compressed number */ dst = compress3(data_len,dst); /* This must not happen. Anyway check it */ if(dst > src) progerr("Internal error in remove_worddata_longs"); /* dst may be smaller than src. So move the data */ memcpy(dst,src,data_len); /* Increase pointers */ src += data_len; dst += data_len; /* Check if we are at the end of the buffer */ if ((src - worddata) == *sz_worddata) break; /* End of worddata */ /* Get next metaID */ metaID = uncompress2(&src); dst = compress3(metaID,dst); } /* Adjust to new size */ *sz_worddata = dst - worddata; }