/*
$Id: result_sort.c,v 1.89 2005/05/12 15:41:08 karman Exp $
**

    This file is part of Swish-e.

    Swish-e is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    Swish-e is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along  with Swish-e; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    
    See the COPYING file that accompanies the Swish-e distribution for details
    of the GNU GPL and the special exception available for linking against
    the Swish-e library.
    
    
** Mon May  9 12:52:52 CDT 2005 -- added GPL


** Move index presorting code to pre_sort.c  - Sept 2002 - moseley
**
*/

#include "config.h"
#include "swish.h"
#include "swstring.h"
#include "mem.h"
#include "merge.h"
#include "list.h"
#include "search.h"
#include "docprop.h"
#include "metanames.h"
#include "compress.h"
#include "error.h"
#include "db.h"
#include "parse_conffile.h"
#include "swish_qsort.h"
#include "result_sort.h"
#include "array.h"
#include "rank.h"

// #define DEBUGSORT 1


/*****************************************************************************
* compare_results_single_index - qsort compare function
*
*  compares properties for a single index which means the results
*  share the same parent db_results structure and associated SortData.
*
*  Will lazy load properties as needed when primary key matches
*
*  Just to keep things confusing, +1 = desc sort, -1 = ascending
*  qsort's output is then reversed when rebuilding the linked list of results.
*
*
*****************************************************************************/
static int compare_results_single_index(const void *s1, const void *s2)
{
    RESULT    *r1 = *(RESULT * const *) s1;
    RESULT    *r2 = *(RESULT * const *) s2;
    int       i;
    int       rc;
    int       num_fields      = r1->db_results->num_sort_props;
    SortData  *sort_data;
    int       *presorted;

    for (i = 0; i < num_fields; i++)
    {
        sort_data = &r1->db_results->sort_data[i];


        /* special case of sorting by (raw) rank */
        if ( sort_data->is_rank_sort )
        {
            if ( (rc = r1->rank - r2->rank) )
                return ( rc * sort_data->direction );
            else
                continue;
        }


        /* If haven't checked this property for a pre-sorted table then try to load the table */
        if ( !sort_data->property->sorted_loaded )
            LoadSortedProps( r1->db_results->indexf, sort_data->property );


        /* can we compare with pre-sorted numbers? (i.e. is the presorted data (sorted_data) loaded into the metaEntry?) */


        if ( (presorted = sort_data->property->sorted_data) )
        {
            if ( (rc = DB_ReadSortedData( presorted, r1->filenum - 1) -  DB_ReadSortedData( presorted, r2->filenum - 1)) )
                return ( rc * sort_data->direction );  /* is the multiplication slow? */
        }


        else /* must compare properties directly */
        {
            /* First, does an array exist to hold the pointers to the properties? */
            if ( !sort_data->key )
            {
                sort_data->key = (propEntry **)emalloc( r1->db_results->result_count * sizeof( propEntry *) );
                memset( sort_data->key, -1, r1->db_results->result_count * sizeof( propEntry *) );
            }


            /* Now load the properties if they do not already exist (a -1 pointer indicates undefined) */
            /* tfrequency is used to store the index into the key (propery key) array */

            if ( sort_data->key[ r1->tfrequency ] == (propEntry *)-1 )
                sort_data->key[ r1->tfrequency ] = getDocProperty( r1, &sort_data->property, 0, sort_data->property->sort_len );

            if ( sort_data->key[ r2->tfrequency ] == (propEntry *)-1 )
                sort_data->key[ r2->tfrequency ] = getDocProperty( r2, &sort_data->property, 0, sort_data->property->sort_len );

            /* finally compare the properties */
            if ( (rc = Compare_Properties(  sort_data->property, sort_data->key[ r1->tfrequency ], sort_data->key[ r2->tfrequency ]) ) )
                return ( rc * sort_data->direction );
        }

    }
    return 0;
}
/*****************************************************************************
* compare_results
*
*  This code is used when tape-merging multiple indexes.  Almost the same
*  as above, but may be comparing results from different indexes must
*  associate data with each result and can not use the presorted index tables.
*
*  Must make sure that the sort keys are the same for each index
*  (e.g. same prop type, type of case compare)
*
*
*****************************************************************************/
int compare_results(const void *s1, const void *s2)
{
    RESULT    *r1 = *(RESULT * const *) s1;
    RESULT    *r2 = *(RESULT * const *) s2;
    int       i;
    int       rc;
    int       num_fields      = r1->db_results->num_sort_props;
    SortData  *sort_data1;
    SortData  *sort_data2;

    for (i = 0; i < num_fields; i++)
    {
        sort_data1 = &r1->db_results->sort_data[i];
        sort_data2 = &r2->db_results->sort_data[i];


        /* special case of sorting by (raw) rank */
        /* this is a bit more questionable because comparing raw ranks between two */
        /* indexes may not work well -- probably ok now with simple word-count based ranking */

        /* very unlikely that there is a property "swishrank" that was not the rank, so don't check both */
        if ( sort_data1 ->is_rank_sort )
        {
            if ( (rc = r1->rank - r2->rank) )
                return ( rc * sort_data1->direction );
            else
                continue;
        }


        /* First, does an array exist to hold the pointers to the properties for each result? */
        if ( !sort_data1->key )
        {
            sort_data1->key = (propEntry **)emalloc( r1->db_results->result_count * sizeof( propEntry *) );
            memset( sort_data1->key, -1, r1->db_results->result_count * sizeof( propEntry *) );
        }

        if ( !sort_data2->key )
        {
            sort_data2->key = (propEntry **)emalloc( r2->db_results->result_count * sizeof( propEntry *) );
            memset( sort_data2->key, -1, r2->db_results->result_count * sizeof( propEntry *) );
        }


        /* Now load the properties if they do not already exist (a -1 pointer indicates undefined) */

        if ( sort_data1->key[ r1->tfrequency ] == (propEntry *)-1 )
            sort_data1->key[ r1->tfrequency ] = getDocProperty( r1, &sort_data1->property, 0, sort_data1->property->sort_len );

        if ( sort_data2->key[ r2->tfrequency ] == (propEntry *)-1 )
            sort_data2->key[ r2->tfrequency ] = getDocProperty( r2, &sort_data2->property, 0, sort_data2->property->sort_len );

        /* finally compare the properties */
        if ( (rc = Compare_Properties(  sort_data1->property, sort_data1->key[ r1->tfrequency ], sort_data2->key[ r2->tfrequency ]) ) )
           return ( rc * sort_data1->direction );

    }
    return 0;
}


/*******************************************************************
*   Loads metaentry->sorted_data with sorted array for the given metaEntry
*
*   Call with:
*       *sw
*       *indexf
*       *metaEntry - meta entry in question
*
*   Returns:
*       pointer to an array of int (metaentry->sorted_data)
*
*   Notes:
*       This is also called by proplimit and merge code
*       And it's name sucks.
*
********************************************************************/
int    *LoadSortedProps(IndexFILE * indexf, struct metaEntry *m)
{
    unsigned char *buffer;
    int     sz_buffer;

    if ( m->sorted_loaded )
        return m->sorted_data;

    m->sorted_loaded = 1;  /* flag that we tried to load the data */

    DB_InitReadSortedIndex(indexf->sw, indexf->DB);

    /* Get the sorted index of the property */

    /* Convert to a property index */
    DB_ReadSortedIndex(indexf->sw, m->metaID, &buffer, &sz_buffer, indexf->DB);

#ifdef USE_PRESORT_ARRAY
    m->sorted_data = (int *)buffer;
    DB_EndReadSortedIndex(indexf->sw, indexf->DB);
    return m->sorted_data;

#endif


    /* If a table was found, then uncompress */
    /* FIX $$$ This should be in db_native.c */

    if (sz_buffer)
    {
        unsigned char *s = buffer;
        int j;

        m->sorted_data = (int *) emalloc(indexf->header.totalfiles * sizeof(int));

        /* Unpack / decompress the numbers */
        for (j = 0; j < indexf->header.totalfiles; j++)
            m->sorted_data[j] = uncompress2(&s);

        efree(buffer);
    }


    DB_EndReadSortedIndex(indexf->sw, indexf->DB);
    return m->sorted_data;
}


/***************************************************************************************
* sort_single_index_results
*
*   Call with
*       DB_RESULTS
*
*   Returns:
*       total results
*
*   This does all the work.
*       - initializes an array of arrays to hold properties
*       - pre-load the array[0] elements with properties, if needed
*         (i.e. when the first sort key is non-presorted
*
*   Todo:
*       This runs through the list results a number of times (plus qsort)
*
****************************************************************************************/
static int sort_single_index_results( DB_RESULTS *db_results )
{
    int lookup_props = 0;  /* flag if we need to load props initially */
    int results_in_index = 0;
    RESULT  *cur_result;
    RESULT **sort_array;
    SortData *sort_data;

    /* Any results to process? */
    if( !db_results->resultlist )
        return 0;


    /* sanity checks */

    /* Should check for too big, too? */
    if ( db_results->num_sort_props < 1 )
        progerr("called sort_single_index_results with invalid number of sort keys");

    if ( ! db_results->sort_data )
        progerr("called sort_single_index_results without a vaild sort_data struct");


    /* Need to tally up the number of results in this set */
    /* $$$$ can search.c do this when creating results? It's an extra loop */
    /* perhaps it can't be done in search.c -- need to set an index number on the result */

    cur_result = db_results->resultlist->head;

    while ( cur_result )
    {
        /* Set an index number which can be used to point into the sort_data->key array. */
        /* tfrequency is not after rank calculations (or at all?) */

        cur_result->tfrequency = results_in_index++;

        cur_result = cur_result->next;
    }

    db_results->result_count = results_in_index;  /* needed so we know how big to create arrays */


    /* Do we need to lookup properties for the first key? */
    /* Not if sorting by rank (that's in the result) or if there's presorted data available */

    sort_data = &db_results->sort_data[0];


    if ( !sort_data->is_rank_sort && !sort_data->property->sorted_data )
        if ( !LoadSortedProps( db_results->indexf, sort_data->property ) )    /* can we load the array? */
        {
            /* otherwise, we must read all the properties off disk */

            lookup_props = 1;


            /* Create the array the size of the number of results to hold *propEntry's */
            /* This array sticks around until freeing the results object */

            sort_data->key = (propEntry **)emalloc( db_results->result_count * sizeof( propEntry *) );
            memset( sort_data->key, -1, db_results->result_count * sizeof( propEntry *) );
        }


    /* Now build an array to hold the results for sorting */

    sort_array = (RESULT **) emalloc(db_results->result_count * sizeof(RESULT *));


    /* Fill the array for qsort -- and load the properties, if they are needed */
    /* This could be optimized for rank, since that's in the result structure */
    /* but would add complexity to the sort functions -- need to benchmark or profile */

    cur_result = db_results->resultlist->head;


    while ( cur_result )
    {
        sort_array[ cur_result->tfrequency ] = cur_result;

        /* If can't use the presorted numbers ("meta->sorted_data") then load the properties */
        if ( lookup_props )
            sort_data->key[ cur_result->tfrequency ] =
                getDocProperty( cur_result, &sort_data->property, 0, sort_data->property->sort_len );

        cur_result = cur_result->next;
    }

    /* Sort them */
    swish_qsort(sort_array, db_results->result_count, sizeof(RESULT *), compare_results_single_index);


    /* Build the list -- the list is in reverse order, so build the list backwards */
    {
        RESULT *head = NULL;
        int j;

        for (j = 0; j < db_results->result_count; j++)
        {
            RESULT *r = sort_array[j];


            /* Now's a good time to normalize the rank as we are processing each result */


            /* Find the largest rank for scaling */
            if (r->rank > db_results->results->bigrank)
                db_results->results->bigrank = r->rank;


            if ( !head )             // first time
            {
                head = r;
                r->next = NULL;
            }
            else                    // otherwise, place this at the head of the list
            {
                r->next = head;
                head = r;
            }

        }
        db_results->sortresultlist = head;
        db_results->resultlist->head = head;
        db_results->currentresult = head;


    }


    /* Free the memory of the array */
    efree( sort_array );


    return db_results->result_count;
}


/***************************************************************************************
* sortresults - sorts the results for one or more indexes searched
*
*   Call with
*       A RESULTS_OBJECT which contains a list of DB_RESULTS
*
*   Returns:
*       total results
*
*   This simply loops through the indexex, and then scales the rank
*
*   rewritten Sept 2002 - moseley / again July 2003
*
****************************************************************************************/

int  sortresults(RESULTS_OBJECT *results)
{
    int         TotalResults = 0;
    DB_RESULTS *db_results = results->db_results;

    while ( db_results )
    {
        TotalResults += sort_single_index_results( db_results );
        db_results = db_results->next;
    }

    /* set rank scaling factor based on the largest rank found of all results */

    if (results->bigrank)
        results->rank_scale_factor = 10000000 / results->bigrank;
    else
        results->rank_scale_factor = 10000;


    return TotalResults;
}