/*
    This file is part of canberra.
    
    This code is written by Giuseppe Jurman <jurman@itc.it>
    and Davide Albanese <albanese@fbk.it> (Python interface).
    (C) 2008 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <Python.h>
#include <numpy/arrayobject.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <numpysupport.h>

/* Compute mean Canberra distance indicator on top-k sublists
 * 
 * Input:
 * nl    - number of lists
 * ne    - number of elements for each list
 * lists - lists matrix (nl x ne)
 * k     - top-k sublists
 *
 * Output:
 * indicator - mean Canberra distance indicator
 */


double harm(long n)
{
  double h = 0.0;
  long i;
  
  for(i=1; i<=n; i++)
    h += 1.0 / (double)i;	
  
  return h;
}

double e_harm(long n)
{
  return 0.5 * harm(floor((double)n / 2.0));
}

double o_harm(long n)
{
  return harm(n) - 0.5 * harm(floor((double)n / 2.0));
}

double a_harm(long n)
{
  return n%2 ? o_harm(n) : e_harm(n); 
}

double exact_canberra(long ne, long k)
{
  double sum;
  long t;
  
  sum = 0.0;
  for (t=1; t<=k; t++)
    sum += t * (a_harm(2*k-t) - a_harm(t));
  
  return 2.0/ne * sum + (2.0*(ne-k)/ne) * (2*(k+1) * (harm(2*k+1)-harm(k+1))-k);
}

/***** Only for canberra_quotient() *****/
double xi(long s)
{
  return (s+0.5)*(s+0.5)*harm(2*s+1)-0.125*harm(s)-0.25*(2.0*s*s+s+1.0);
}

double eps(long k, long s)
{
  return 0.5*(s-k)*(s+k+1.0)*harm(s+k+1)+0.5*k*(k+1)*harm(k+1)+0.25*s*(2.0*k-s-1.0);
}

double delta(long a, long b, long c){
  double d;
  long i;
  
  d=0.0;
  for(i=a;i<=b;i++)
    d += (double)fabs(c-i)/(double)(c+i);
  
  return d;
}
/***************************************/


double canberra_location(long nl, long ne, long **lists, long k, long *i1, long *i2, double *dist)
{
  long i, idx1, idx2, l1, l2, count;
  double distance, indicator;

  indicator = 0.0;
  count = 0;
  
  for(idx1 = 1; idx1 <= nl-1; idx1++)
    for(idx2 = idx1+1; idx2 <= nl; idx2++)
      {
	distance = 0.0;
	for(i = 1; i <= ne; i++)
	  {
	    l1 = ((lists[(idx1-1)][i-1] + 1) <= k+1) ? (lists[(idx1-1)][i-1] + 1) : k+1;	
	    l2 = ((lists[(idx2-1)][i-1] + 1) <= k+1) ? (lists[(idx2-1)][i-1] + 1) : k+1;
	    distance += fabs(l1-l2) / (l1+l2);	    
	  }
	
	i1[count] = idx1 - 1;
	i2[count] = idx2 - 1;
	dist[count] = distance;	
	count++;

	indicator += 2.0 * distance / (nl*(nl-1)) ; 
      }
  return indicator;
}


double average_partial_list(long nl, long ne, long **lists)
{
  long i, j;
  double nm = 0.0;
  double tmp;

  for(i = 0; i < nl; i++)
    {
      tmp = 0.0;
      for(j = 0; j < ne; j++)
	if(lists[i][j] > -1)
	  tmp++;
      nm += tmp / nl;
    }

  return nm;
}


double normalizer(long ne, long nm)
{
  return (1.0 - exact_canberra(nm, nm) / exact_canberra(ne, ne));
}


double canberra_quotient(long nl, long ne, long **lists, long complete, long normalize, long *i1, long *i2, double *dist)
{
  long i, idx1, idx2, count;
  long t1, t2, ii;
  long p, l1, l2, l1tmp, l2tmp, j;
  double distance, indicator, tmp2, tmp3;
  long *intersection;
  long *list1, *list2;
  long common;
  long unused;
  double A;
  double nm;


  p = ne;
  indicator = 0.0;
  count = 0;
  
  for(idx1 = 1; idx1 <= nl-1; idx1++){
    
    l1tmp = 0;
    for(i = 1; i <= ne; i++)
      if(lists[(idx1-1)][i-1] > -1)
	l1tmp++;
    
    for(idx2 = idx1+1; idx2 <= nl; idx2++)
      {
	l2tmp = 0;
	for(i = 1; i <= ne; i++)
	  if(lists[(idx2-1)][i-1] > -1)
	    l2tmp++;
	
	if(l1tmp<=l2tmp){
	  list1=lists[idx1-1];
	  list2=lists[idx2-1];
	  l1=l1tmp;
	  l2=l2tmp;
	}else{
	  list2=lists[idx1-1];
	  list1=lists[idx2-1];
	  l1=l2tmp;
	  l2=l1tmp;
	}
	
        common = 0;
        for(i = 1; i <= ne; i++)
	  if(list1[i-1] > -1 && list2[i-1] > -1)
	    common++;
        
	intersection = (long *) malloc(common * sizeof(long));
        
	unused = 0;
	j = 0;
        for(i = 1; i <= ne; i++)
	  {
	    if(list1[i-1] > -1 && list2[i-1] > -1) 
	      intersection[j++] = i;
	    if(list1[i-1] == -1 && list2[i-1] == -1)
	      unused++;
	  }
	
	distance = 0.0;
	tmp2 = 0.0;
	tmp3 = 0.0;
        for(i = 0; i <= common-1; i++)
          {
	    ii = intersection[i];
            t1 = list1[ii-1] + 1; 
            t2 = list2[ii-1] + 1;
    	    distance += fabs(t1-t2) / (t1+t2);
            tmp2 += delta(l2+1, p, t1);
            tmp3 += delta(l1+1, p, t2);
	  }
	
	
	if(p!=l2)
	  distance += 1.0 / (p-l2) *
	    (-tmp2 + l1*(p-l2) - 2.0*eps(p,l1) + 2.0*eps(l2,l1));

	if(p!=l1)
	  distance += 1.0 / (p-l1) *
	    (-tmp3 + (p-l1)*l1 - 2.0*eps(p,l1) + 2.0*eps(l1,l1) +
	     2.0 * (xi(l2) - xi(l1)) -
	     2.0 * (eps(l1,l2) - eps(l1,l1) + eps(p,l2) - eps(p,l1)) +
	     (p+l1) * (l2-l1) + l1*(l1+1.0) - l2*(l2+1.0));

	if(p!=l1 && p!=l2 && complete == 1)
	  {
	    A = (1.0 * unused) / ((p - l1) * (p - l2));
	    
	    distance +=  A * (2.0 * xi(p) - 2.0 * xi(l2)
			      - 2.0 * eps(l1, p) + 2.0 * eps(l1, l2)
			      - 2.0 * eps(p, p) + 2.0 * eps(p, l2) + 
			      (p + l1) * (p - l2) + l2 * (l2 + 1.0) - p *(p + 1.0));
	  }

	i1[count] = idx1 - 1;
	i2[count] = idx2 - 1;
	dist[count] = distance;
	count++;

        indicator += 2.0 * distance / (nl * (nl - 1)) ;

        free(intersection);
      }
  }

  if(normalize == 1)
    {
      nm = average_partial_list(nl, ne, lists);
      indicator /= normalizer(ne, nm);
    }

  return indicator;
}


static PyObject *canberracore_canberra(PyObject *self, PyObject *args, PyObject *keywds)
{
  PyObject *lists = NULL; PyObject *listsa = NULL;
  int k;
  PyObject *dist = Py_False;

  /* Parse Tuple*/
  static char *kwlist[] = {"lists", "k", "dist", NULL};
  if (!PyArg_ParseTupleAndKeywords(args, keywds, "Oi|O", kwlist, &lists, &k, &dist))
    return NULL;

  listsa = PyArray_FROM_OTF(lists, NPY_LONG, NPY_IN_ARRAY);
  if (listsa == NULL) return NULL;
  
  /* Check k */
  if (k > PyArray_DIM(listsa, 1) || k <= 0){
    PyErr_SetString(PyExc_ValueError, "k must be in (0, lists.shape[1]]");
    return NULL;
  }
 
  int nl = PyArray_DIM(listsa, 0);
  int ne = PyArray_DIM(listsa, 1);
  long **_lists = lmatrix_from_numpy(listsa);

  npy_intp o_dims[1];
  o_dims[0] = (npy_intp) (nl * (nl - 1)) / 2.0;
  
  PyObject *i1_a   = PyArray_SimpleNew(1, o_dims, NPY_LONG);
  PyObject *i2_a   = PyArray_SimpleNew(1, o_dims, NPY_LONG);
  PyObject *dist_a = PyArray_SimpleNew(1, o_dims, NPY_DOUBLE);
  
  long *i1_v     = (long *) PyArray_DATA(i1_a);
  long *i2_v     = (long *) PyArray_DATA(i2_a);
  double *dist_v = (double *) PyArray_DATA(dist_a);
  
  double distance = canberra_location(nl, ne, _lists, k, i1_v, i2_v, dist_v);
  double exact    = exact_canberra(ne, k);
   
  double distnorm = distance / exact;

  free(_lists);
  Py_DECREF(listsa);

  if (dist == Py_True)
    return Py_BuildValue("d, N, N, N", distnorm, i1_a, i2_a, dist_a);
  else
    {
      Py_DECREF(i1_a);
      Py_DECREF(i2_a);
      Py_DECREF(dist_a);
      return Py_BuildValue("d", distnorm);
    }
  
}


static PyObject *canberracore_canberraq(PyObject *self, PyObject *args, PyObject *keywds)
{
  PyObject *lists = NULL; PyObject *listsa = NULL;
  PyObject *complete = Py_True;
  PyObject *normalize = Py_False;
  PyObject *dist = Py_False;

  int c;
  int n;

  /* Parse Tuple*/
  static char *kwlist[] = {"lists", "complete", "normalize", "dist", NULL};
  if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|OOO", kwlist, &lists, 
				   &complete, &normalize, &dist))
    return NULL;

  listsa = PyArray_FROM_OTF(lists, NPY_LONG, NPY_IN_ARRAY);
  if (listsa == NULL) return NULL;
  
  int nl = PyArray_DIM(listsa, 0);
  int ne = PyArray_DIM(listsa, 1);
  long **_lists = lmatrix_from_numpy(listsa);
 
  if (complete == Py_True) c = 1;
  else c = 0;
  
  if (normalize == Py_True) n = 1;
  else n = 0;

  npy_intp o_dims[1];
  o_dims[0] = (npy_intp) (nl * (nl - 1)) / 2.0;
  
  PyObject *i1_a   = PyArray_SimpleNew(1, o_dims, NPY_LONG);
  PyObject *i2_a   = PyArray_SimpleNew(1, o_dims, NPY_LONG);
  PyObject *dist_a = PyArray_SimpleNew(1, o_dims, NPY_DOUBLE);
  
  long *i1_v     = (long *) PyArray_DATA(i1_a);
  long *i2_v     = (long *) PyArray_DATA(i2_a);
  double *dist_v = (double *) PyArray_DATA(dist_a);

  double distance = canberra_quotient(nl, ne, _lists, c, n,  i1_v, i2_v, dist_v);
  double exact    = exact_canberra(ne, ne);
  double distnorm = distance / exact;

  free(_lists);
  Py_DECREF(listsa);

  
  if (dist == Py_True)
    return Py_BuildValue("d, N, N, N", distnorm, i1_a, i2_a, dist_a);
  else
    {
      Py_DECREF(i1_a);
      Py_DECREF(i2_a);
      Py_DECREF(dist_a);
      return Py_BuildValue("d", distnorm);
    }
}


static PyObject *canberracore_normalizer(PyObject *self, PyObject *args, PyObject *keywds)
{
  PyObject *lists  = NULL; PyObject *listsa = NULL;

  static char *kwlist[] = {"lists", NULL};
  if (!PyArg_ParseTupleAndKeywords(args, keywds, "O", kwlist, &lists))
    return NULL;

  listsa = PyArray_FROM_OTF(lists, NPY_LONG, NPY_IN_ARRAY);
  if (listsa == NULL) return NULL;
  
  int nl = PyArray_DIM(listsa, 0);
  int ne = PyArray_DIM(listsa, 1);
  long **_lists = lmatrix_from_numpy(listsa);
 
  double nm = average_partial_list(nl, ne, _lists);
  double nf = normalizer(ne, nm);

  Py_DECREF(listsa);
  return Py_BuildValue("(d, d)", nm, nf);

}


/* Doc strings: */
static char canberracore_canberra_doc[] = 
"Compute mean Canberra distance indicator on top-k sublists.\n"
"Positions must be in [0, #elems-1].\n\n"
"Input\n"
"  * *lists* - lists          [2D numpy array integer]\n"
"  * *k*     - top-k sublists [integer]\n\n"
"Output\n"
"  * canberra distance\n\n"
">>> from numpy import *\n"
">>> from mlpy import *\n"
">>> lists = array([[2,4,1,3,0],  # positions, firts list\n"
"...                [3,4,1,2,0],  # positions, second list\n"
"...                [2,4,3,0,1],  # positions, third list\n"
"...                [0,1,4,2,3]]) # positions, fourth list\n"
">>> canberra(lists, 3)\n"
"1.0861983059292479"
;

static char canberracore_canberraq_doc[] = 
"Compute mean Canberra distance indicator on generic lists.\n"
"Positions must be in [-1, #elems-1], where -1 indicates features\n"
"not present in the list.\n\n"
"Input\n"
"  * *lists*     - lists          [2D numpy array integer]\n"
"  * *complete*  - complete       [True or False]\n"
"  * *normalize* - normalize      [True or False]\n"
"Output\n"
"  * canberra distance\n\n"
">>> from numpy import *\n"
">>> from mlpy import *\n"
">>> lists = array([[2,-1,1,-1,0],  # positions, firts list\n"
"...                [3,4,1,2,0],    # positions, second list\n"
"...                [2,-1,3,0,1],   # positions, third list\n"
"...                [0,1,4,2,3]])   # positions, fourth list\n"
">>> canberraq(lists)\n"
"1.0628570368721744"
;

static char canberracore_normalizer_doc[] = 
"Compute the average length of the partial lists (nm) and the corresponding\n"
"normalizing factor (nf) given by 1 - a / b where a is the exact value computed\n"
"on the average length and b is the exact value computed on the whole set of\n"
"features.\n\n"
"Inputs"
"  * *lists*    - lists [2D numpy array integer]\n"
"Output\n"
"  * (nm, nf)"
;


static char module_doc[] = "Canberra core module";

/* Method table */
static PyMethodDef canberracore_methods[] = {
  {"canberra",
   (PyCFunction)canberracore_canberra,
   METH_VARARGS | METH_KEYWORDS,
   canberracore_canberra_doc},
  {"canberraq",
   (PyCFunction)canberracore_canberraq,
   METH_VARARGS | METH_KEYWORDS,
   canberracore_canberraq_doc},
  {"normalizer",
   (PyCFunction)canberracore_normalizer,
   METH_VARARGS | METH_KEYWORDS,
   canberracore_normalizer_doc},
  {NULL, NULL, 0, NULL}
};

/* Init */
void initcanberracore()
{
  Py_InitModule3("canberracore", canberracore_methods, module_doc);
  import_array();
}
