/*
$Id: index.h,v 1.62 2005/05/12 15:41:05 karman Exp $
**


    This file is part of Swish-e.

    Swish-e is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    Swish-e is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along  with Swish-e; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    
    See the COPYING file that accompanies the Swish-e distribution for details
    of the GNU GPL and the special exception available for linking against
    the Swish-e library.
    
** Mon May  9 18:19:34 CDT 2005
** added GPL


*/

#ifndef __HasSeenModule_Index
#define __HasSeenModule_Index       1

struct dev_ino
{
    dev_t   dev;
    ino_t   ino;
    struct dev_ino *next;
};

struct IgnoreLimitPositions
{
    int     n;                  /* Number of entries per file */
    int    *pos;                /* Store metaID1,position1, metaID2,position2 ..... */
};

/* This is used to build a list of the metaIDs that are currently in scope when indexing words */

typedef struct
{
    int    *array;              /* list of metaIDs that need to be indexed */
    int     max;                /* max size of table */
    int     num;                /* number in list */
    int     defaultID;          /* default metaID (should always be one, I suppose) */
}
METAIDTABLE;


/*
   -- module data
*/


struct MOD_Index
{
    /* entry vars */
    METAIDTABLE metaIDtable;
    ENTRYARRAY *entryArray;
    ENTRY  *hashentries[VERYBIGHASHSIZE];
    char    hashentriesdirty[VERYBIGHASHSIZE]; /* just a 0/1 flag */

    /* Compression Work buffer while compression locations in index ** proccess */
    unsigned char *compression_buffer;
    int     len_compression_buffer;

    unsigned char *worddata_buffer;  /* Buffer to store worddata */
    int    len_worddata_buffer;     /* Max size of the buffer */
    int    sz_worddata_buffer;      /* Space being used in worddata_buffer */

    /* File counter */
    int     filenum;

    /* index tmp (both FS and HTTP methods) */
    char   *tmpdir;

    /* Filenames of the swap files */
    char   *swap_location_name[MAX_LOC_SWAP_FILES]; /* Location info file */

    /* handlers for both files */
    FILE   *fp_loc_write[MAX_LOC_SWAP_FILES];       /* Location (writing) */
    FILE   *fp_loc_read[MAX_LOC_SWAP_FILES];        /* Location (reading) */

    struct dev_ino *inode_hash[BIGHASHSIZE];

    /* Buffers used by indexstring */
    int     lenswishword;
    char   *swishword;
    int     lenword;
    char   *word;

    /* Economic mode (-e) */
    int     swap_locdata;       /* swap location data */

    /* Pointer to swap functions */
    sw_off_t    (*swap_tell) (FILE *);
            size_t(*swap_write) (const void *, size_t, size_t, FILE *);
    int     (*swap_seek) (FILE *, sw_off_t, int);
            size_t(*swap_read) (void *, size_t, size_t, FILE *);
    int     (*swap_close) (FILE *);
    int     (*swap_putc) (int, FILE *);
    int     (*swap_getc) (FILE *);

    /* IgnoreLimit option values */
    int     plimit;
    int     flimit;
    /* Number of words from IgnoreLimit */
    int     nIgnoreLimitWords;
    struct swline *IgnoreLimitWords;

    /* Positions from stopwords from IgnoreLimit */
    struct IgnoreLimitPositions **IgnoreLimitPositionsArray;

    /* Index in blocks of chunk_size files */
    int     chunk_size;

    /* Variable to control the size of the zone used for store locations during chunk proccesing */
    int     optimalChunkLocZoneSize;

    /* variable to handle free memory space for locations inside currentChunkLocZone */

    LOCATION *freeLocMemChain;

    MEM_ZONE *perDocTmpZone;
    MEM_ZONE *currentChunkLocZone;
    MEM_ZONE *totalLocZone;
    MEM_ZONE *entryZone;

    int     update_mode;    /* Set to 1 when in update mode */
                            /* Set to 2 when in remove mode */
};

void    initModule_Index(SWISH *);
void    freeModule_Index(SWISH *);
int     configModule_Index(SWISH *, StringList *);


void    do_index_file(SWISH * sw, FileProp * fprop);

ENTRY  *getentry(SWISH * , char *);
void    addentry(SWISH *, ENTRY *, int, int, int, int);

void    addCommonProperties(SWISH * sw, FileProp * fprop, FileRec * fi, char *title, char *summary, int start);


int     getfilecount(IndexFILE *);

int     getNumberOfIgnoreLimitWords(SWISH *);
void    getPositionsFromIgnoreLimitWords(SWISH * sw);

char   *ruleparse(SWISH *, char *);

#define isIgnoreFirstChar(header,c) (header)->ignorefirstcharlookuptable[(int)((unsigned char)c)]
#define isIgnoreLastChar(header,c) (header)->ignorelastcharlookuptable[(int)((unsigned char)c)]
#define isBumpPositionCounterChar(header,c) (header)->bumpposcharslookuptable[(int)((unsigned char)c)]


void    computehashentry(ENTRY **, ENTRY *);

void    sort_words(SWISH *);

int     indexstring(SWISH * sw, char *s, int filenum, int structure, int numMetaNames, int *metaID, int *position);

void    addsummarytofile(IndexFILE *, int, char *);

void    BuildSortedArrayOfWords(SWISH *, IndexFILE *);


void    PrintHeaderLookupTable(int ID, int table[], int table_size, FILE * fp);
void    coalesce_all_word_locations(SWISH * sw, IndexFILE * indexf);
void    coalesce_word_locations(SWISH * sw, ENTRY * e);

void    adjustWordPositions(unsigned char *worddata, int *sz_worddata, int n_files, struct IgnoreLimitPositions **ilp);

#endif