/*********************************************************************/
/*        LINEAR TREE for Supervised Learning                        */
/*        Versao 1.0 (10/12/1997)                                    */
/*        Developed by: Joao Gama                                    */
/*                LIACC - Uni.do Porto                               */
/*                jgama@ncc.up.pt                                    */
/*-------------------------------------------------------------------*/
/*  File: Ltree.c                                                    */
/*********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <values.h>
#include "Ci_instances.h"
#include "utils.h"

static char    UNKNOWN  = '?';
static char    DONTCARE = '*';
#define        MAX_STR_SIZE 5024
#define digit(ch)      ((ch) >= '0' && (ch) <= '9')
#define letter(ch)     ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
/************************************************/
/*             Private Methods                  */
/************************************************/
static CiExample **_ReadCiInstances(FILE *fi, DomainInfo *domain, unsigned long *nrexs, unsigned long nr);
static AttrVal *ReadAttVal(FILE *fi, DomainInfo *domain, unsigned long nrex, long line_nr);
static AttrVal *ReadCiInstance(FILE *fi, DomainInfo *domain, long nrex, int *classe);
static void CiExchange(CiExample **examples, unsigned long ex1, unsigned long ex2);
static DomainInfo *GenerateDomainInfo(char ***lines, int *nr_words, int nr_lines, int line, int natt);
static int Processa_atributo(char **lines, int nr_words, int natt, char **name, int *type, void **vals, int *nr_vals);
/************************************************/
/*              Public Methods                  */
/************************************************/
CiDs *ReadCiDataset(FILE *fi, DomainInfo *domain)
{
  CiDs *ds = (CiDs *) malloc(sizeof(CiDs));
  if (ds) {
    ds->examples = _ReadCiInstances(fi, domain, &ds->nr_exs, 1);
    ds->domain = domain;
  }
  else
    fprintf(stderr, "ReadCiDataset: Out of memory\n");
  return ds;
}

void Show_CiInstances(CiDs *ds, long int Low, long int High)
{
  register unsigned long i;
  
  if (High > ds->nr_exs) High = ds->nr_exs;
  printf("\nId\tNr.Att\tWeight\tCl\n");
  for (i = Low; i <= High; i++) {
    printf("%.0f\t%d\t%.3f\t%d\t", Id(Ci_AttVal(ds, i)), Ci_NrAtts(Ci_Example(ds, i)),Ci_Weight(Ci_Example(ds, i)), Ci_Classe(Ci_Example(ds,i)));
    ShowCiInstance(ds->domain, Ci_AttVal(ds, i), Ci_NrAtts(Ci_Example(ds,i)));
  }
}

int Ci_ReBuildInstance(CiExample *exemplo, int nr_att)
{
  register int i;
  AttrVal *instance;
  
  if ((instance = (AttrVal *) realloc(exemplo->instance, nr_att * sizeof(AttrVal))) != NULL) {
    for(i = 1+exemplo->nr_att; i < nr_att; i++) {
      TypeOfVal(instance[i]) = normal;
      instance[i].val.c = 0.0;
    }
    exemplo->instance = instance;
    exemplo->nr_att = nr_att - 1;
    return TRUE;
  }
  return FALSE;
}
/************************************************/
/*             Private Methods                  */
/************************************************/
static CiExample **_ReadCiInstances\
(FILE *fi, DomainInfo *domain, unsigned long *nrexs, unsigned long nr)
{
  int classe;
  AttrVal *instance;
  CiExample *example, **examples;
  
  if (feof(fi)) {
    if ((examples = (CiExample **) calloc(nr - 1, sizeof(CiExample *))) == NULL) {
      fprintf(stderr, "CiReadInstances: Not enough memory\n");
      exit(1);
    }
    *nrexs = nr - 1;
    return --examples;
  }
  if ((instance = ReadCiInstance(fi, domain, nr, &classe)) != NULL){
    examples = _ReadCiInstances(fi, domain, nrexs, nr+1);
    if ((example = (CiExample *) malloc(sizeof(CiExample))) != NULL) {
      Ci_Classe(example) = classe;
      Ci_NrAtts(example) = NrAttrs(domain); 
      Ci_Weight(example) = 1.0;  
      example->instance = instance;
      examples[nr] = example;
    }
    else
      fprintf(stderr, "CiReadInstances: Not enough memory\n");
    return examples;
  }
  else 
    return _ReadCiInstances(fi, domain, nrexs, nr);
}
/***********************************************************/
/*    Private Methods:  Instances                          */
/***********************************************************/
static AttrVal *ReadCiInstance\
(FILE *fi, DomainInfo *domain, long nrex, int *classe)
{
  char      *value;
  AttrVal   *instance;
  static long     line_nr = 0;

  *classe = -1;
  if ((instance = ReadAttVal(fi, domain, nrex, line_nr)) != NULL) {
    if ((value = ReadField(fi, ",\t ")) != NULL) 
      *classe = IdValLbl(domain, NrAttrs(domain)+1, value);
    else 
      while ((value = ReadField(fi, ",\t ")) != NULL);
    if (*classe == -1)
      fprintf(stderr, "ReadAttVal: Instance %ld (Line %ld) Invalid Classe value: %s\n", nrex, line_nr, value);
  }
  ++line_nr;
  return (*classe == -1) ? NULL : instance;
}

static AttrVal *ReadAttVal\
(FILE *fi, DomainInfo *domain, unsigned long nrex, long line_nr)
{
  register int    i;
  int             pos = 0;
  char            *value;
  AttrVal         *attr_val = (AttrVal *) calloc(1+NrAttrs(domain), sizeof(AttrVal));
  
  if (attr_val == NULL) {
    fprintf(stderr, "ReadAttVal: Out of Memory\n");
    return NULL;
  }
  Id(attr_val) = (ContType) nrex;
  for(i = 1; i <= NrAttrs(domain) && pos != -1; i++) {
    if ((value = ReadField(fi, ",\t ")) != NULL) {
      if (*value == UNKNOWN) {
	TypeOfVal(attr_val[i]) = unknown;
	attr_val[i].val.c = MINFLOAT;
      }
      else if (*value == DONTCARE) {
	TypeOfVal(attr_val[i]) = dontcare;
	attr_val[i].val.c = MINFLOAT;
      }
      else {  
	switch(CiTypeAttr(domain, i)) {
	case continuous: 
	  attr_val[i].val.c =  atof(value);
	  break;
	case integer:
	  attr_val[i].val.d =  atoi(value);
	  if ((pos = IdValLbl(domain, i, value)) == 0)
	    fprintf(stderr, "ReadAttVal: Instance %ld (Line %ld) Invalid attribute ( %d )value: %s\n", nrex, line_nr, i, value);

	  break;
	case ordered:
	case nominal:
	  pos = IdValLbl(domain, i, value);
	  if (pos)
	    attr_val[i].val.d = pos;
	  else {
	    TypeOfVal(attr_val[i]) = unknown;
	    fprintf(stderr, "ReadAttVal: Instance %ld (Line %ld) Invalid attribute ( %d )value: %s\n", nrex, line_nr, i, value);
	  }
	  break;  
	}
      }
    }
    else break;
  }
  if (i > NrAttrs(domain)) return attr_val;
  if (i > 1) 
    fprintf(stderr, "ReadAttVal: Instance %ld (Line %ld) Invalid number of attributes\n", nrex, line_nr);
  free(attr_val);
  return NULL;
}

void ShowCiInstance(DomainInfo *domain, AttrVal *instance, int nr_att)
{
  register int j;
  
  if (instance != NULL) {
    for(j = 1; j <= nr_att; j++) {
      switch(TypeOfVal(instance[j])) {
      case normal:
	switch(CiTypeAttr(domain, j)) {
	case continuous: 
          printf("%6.3f\t", instance[j].val.c);
	  break;
	case integer:
	  printf("%d\t", instance[j].val.d);
	  break;
	case ordered:
	case nominal:
          printf("%s\t", LblValId(domain, j, instance[j].val.d));
	  break;
	}
	break;
      case unknown:
        printf("?\t");
	break;
      case dontcare:
        printf("?\t");
	break;
      }
    }
    printf("\n");
  }
}
/**************************************/
/*   Public Methods  for SORT         */
/**************************************/
/*************************************************** 
   Goal:  Sort instances between [Low .. High] 
          by the values of attribute Att
   Input: Domain Info
          Array of arrays of instances
	  Attribute 
	  Limits of instances
   Output:  TRUE or FALSE
****************************************************/
int CiQuickSort(CiDs *ds, int Att, unsigned long Low, unsigned long High)
{
  register unsigned long i, Lower, Middle;
  double Thresh, value;
  enum AttrTypes tipo;

  tipo = CiTypeAttr(ds->domain, Att);
  if ( Low < High ) {
    switch(tipo) {
    case continuous:
      Thresh = CValAttEx(Ci_AttVal(ds, Low), Att);
      break;
    case nominal:
    case ordered:
    case integer:
      Thresh = DValAttEx(Ci_AttVal(ds, Low), Att);
      break;
    }
    Middle = Low;
    for ( i = Low ; i <= High ; i++ ) {
      value = tipo == continuous ? CValAttEx(Ci_AttVal(ds, i),Att) : (double) DValAttEx(Ci_AttVal(ds, i),Att);
      if (value <= Thresh )  { 
        if (i != Middle ) CiExchange(Ci_Examples(ds),Middle, i);
        Middle++; 
      }
    }
    if (Middle != Low) {
      Lower = Middle - 1;
      for ( i = Lower ; i >= Low ; i-- ) {
      value = tipo == continuous ? CValAttEx(Ci_AttVal(ds, i),Att) : (double) DValAttEx(Ci_AttVal(ds, i),Att);
        if (value == Thresh )  { 
          if ( i != Lower ) CiExchange(Ci_Examples(ds),Lower, i);
          Lower--;
        }
      }
      CiQuickSort(ds, Att, Low, Lower);
      CiQuickSort(ds, Att, Middle, High);
    }
  }
  return TRUE;
}
/*************************************************** 
   Goal:  Join all examples with the same value (nominal attributes)
   Input: Domain Info
          Array of arrays of instances
	  Attribute 
	  Spliting value
	  Limits of examples
   Output:  Spliting point
****************************************************/
long int CiJoinValues\
(CiDs *ds, int Att, int value, unsigned long Low, unsigned long High)
{
  register unsigned long i, Pos = Low;

  if (CiTypeAttr(ds->domain, Att) == integer)
    value = atoi(ValsAttr(ds->domain,Att)[value]);

  for (i = Low ; i <= High ; i++) {
    if (DValAttEx(Ci_AttVal(ds, i), Att) == value) { 
      if (i != Pos ) CiExchange(Ci_Examples(ds), Pos, i);
      Pos++; 
    }
  }
  return Pos;
}
/*************************************************** 
   Goal:  Join all examples with unknown value for an Attribute
   Input: Domain Info
          Array of arrays of instances
	  Attribute 
	  Limits of examples
   Output:  First non Unknown Value
****************************************************/
long int CiJoinUnknowns(CiDs *ds, int Att, unsigned long Low, unsigned long High)
{
  register unsigned long i, Pos = Low;
  
  for (i = Low ; i <= High ; i++) {
    if (!NormalVal(Ci_AttVal(ds, i)[Att])) { 
      if (i != Pos ) CiExchange(Ci_Examples(ds), Pos, i);
      Pos++; 
    }
  }
  return Pos;
}

long int CiMoveUnknowns\
(CiDs *ds, int Att, unsigned long Low, unsigned long High,unsigned long Pos)
{
  register unsigned long i;
  for(i = Low; i < High; i++) {
    CiExchange(Ci_Examples(ds), i, Pos);
    Pos--;
  }
  return Pos;
}
/*************************************************** 
   Goal:  Cut points (quantitative attributes)
   Input: Domain Info
          Array of arrays of instances
	  Attribute 
	  Limits of examples
	  Spliting value
   Output:  Spliting point
   Remarks: Assumes the examples ordered by Attribute value
****************************************************/
long int CiSplitingPosition\
(CiDs *ds, int Att, unsigned long Low, unsigned long High, double value)
{
  register unsigned long i = Low;
  if(CiTypeAttr(ds->domain, Att) == continuous)
    for(i = Low; i <= High && value > CValAttEx(Ci_AttVal(ds, i), Att); i++) ;
  else
    for(i = Low; i <= High && value > DValAttEx(Ci_AttVal(ds, i), Att); i++) ;
  return i-1;
}

/*************************************************** 
   Goal:  Change positions between ex1 and ex2
   Input: Array of arrays of instances
          position of examples ex1 and ex2
   Output: void
****************************************************/
static void CiExchange(CiExample **examples, unsigned long ex1, unsigned long ex2)
{
  CiExample *aux;

  aux = examples[ex1];
  examples[ex1] = examples[ex2];
  examples[ex2] = aux;
}
/*************************************************** 

****************************************************/
DomainInfo *_ReadDomain(char *Name, char *Ext)
{
  FILE            *f;
  DomainInfo      *Info = NULL;
  int             *nr_words;
  long            dummy2;
  char            ***lines, *DF;

  DF = new_strcat(Name, Ext);
  if ((f = fopen(DF,"r")) != NULL) {
    lines = SReadFile(f, &dummy2, &nr_words, ":");
    Info =  GenerateDomainInfo(lines, nr_words, dummy2, 1, 0); 
    Info->root_name=Name; 
    fclose(f);
  }
  return Info;
}

static DomainInfo *GenerateDomainInfo\
(char ***lines, int *nr_words, int nr_lines, int line, int natt)
{
  char *name;
  void *vals = NULL;
  int type = 0, nr_vals = 0;
  DomainInfo *Info = NULL;
  AttrInfo   *ats = NULL;
  
  if (line > nr_lines) {
    Info = (DomainInfo *) malloc(sizeof(DomainInfo));
    Info->nr_attrs = natt;
    ats =(AttrInfo *) calloc(Info->nr_attrs,sizeof(AttrInfo));
    Info->attrs = --ats;   
    return Info;
  }
  if (Processa_atributo(lines[line], nr_words[line], natt, &name, &type, &vals, &nr_vals) != FALSE) {
    Info = GenerateDomainInfo(lines, nr_words, nr_lines, ++line, ++natt);
    Info->attrs[natt].name = name;
    Info->attrs[natt].type_attr = type;
    Info->attrs[natt].nr_vals = nr_vals;
    Info->attrs[natt].vals = vals;
    return Info;
  }
  else
    return GenerateDomainInfo(lines, nr_words, nr_lines, ++line, natt);
}

static int Processa_atributo\
(char **lines, int nr_words, int natt, char **name, int *type, void **vals, int *nr_vals) 
{ 
  int dummy = 0;
  char sdummy[MAX_STR_SIZE], **s;

/*===== values restriction only =====*/
  if (nr_words == 1) { 
    sprintf(sdummy, "attr%d", natt + 1);
    s = v_split(lines[1],", \t:; ",&dummy);
    if (dummy) {
      *name = new_strcpy(sdummy);
      *type = nominal;
      *vals = v_split(lines[1],",; \t", nr_vals);
    }
    else 
      return FALSE;
  }
/*===== name + type  OR   name + values  restriction =====*/
  if (nr_words == 2) { 
    s = v_split(lines[1],",; \t",&dummy);
    *name = s[1];
    s = v_split(lines[2],",; \t",&dummy);
    if (dummy == 1) { /* name + type */
      if (!strcmp(s[1],"continuous"))
	*type = continuous;
      else if (!strcmp(s[1],"nominal"))
	*type = nominal;
      else if (!strcmp(s[1],"integer"))
	*type = integer;
      else if (!strcmp(s[1],"ordered")) {
	fprintf(stderr, "ReadDomain: Error on attribute %s. Ordered attributes need values restriction.\n",*name);
	return FALSE;
      }
      else { 
	fprintf(stderr, "ReadDomain: Error on attribute %s. Unknown type, %s.\n", *name, s[1]);
	return FALSE;
      }
    } 
    else {     /* name + values restriction (nominal or integer attr) */
      *nr_vals = dummy;
      if (all_integers(s, *nr_vals)) {
	*type = integer;
	qsort(s + 1, *nr_vals, sizeof(char *), compare_chars);
	*vals = s;
      }
      else {
	*type = nominal;
	*vals = s;
      }
    }
  }  
  /*===== name + type + values restriction =====*/
  if (nr_words == 3) { 
    s = v_split(lines[1],",; \t",&dummy);
    *name = s[1];
    s = v_split(lines[2],",; \t",&dummy);
    if (!strcmp(s[1],"nominal")) {
      *type = nominal;
      *vals = v_split(lines[3],",; \t", nr_vals);
    } 
    else if (!strcmp(s[1],"ordered")) {
      *type = ordered;
      *vals = v_split(lines[3],",; \t", nr_vals);
    } 
    else if (!strcmp(s[1],"integer")) {
      *type = integer;
      s = v_split(lines[3],",; \t", nr_vals);
      qsort(s+1, *nr_vals, sizeof(char *), compare_chars);
      *vals = s;
    } 
  }
  return TRUE;
}

/* =================================================================
   DISPLAYS THE INFORMATION ON A DOMAIN.
   ----------------------------------------------------------------- */
void ShowDomain(DomainInfo *D)
{
  register int i,v;

  printf("\nPROBLEM NAME :: %s\nNR.ATTRIBUTES :: %d\n\n",D->root_name,D->nr_attrs);
  ForAllAttributes(D,i) {
    printf("Attribute n.%d : %s\n",i,NameAttr(D,i));
    printf("\tType : ");
    if (NomAttr(D,i))
      printf("nominal\n");
    else if (ContAttr(D,i))
      printf("continuous\n");
    else if (OrdAttr(D,i))
      printf("ordered\n");
    else if (IntAttr(D,i))
      printf("integer\n");
    if (NValsAttr(D,i) > 0) {
      printf("\tNr.Different Values : %d\n",NValsAttr(D,i));
      printf("\tValues :: %s",LblValId(D,i,1));
      for(v=2;v<=NValsAttr(D,i);v++) printf(", %s",LblValId(D,i,v));
      printf("\n");
    }
    printf("\n");
  }
}

/* =================================================================
   OBTAINS THE INTERNAL ID OF AN ATTRIBUTE VALUE.
   ----------------------------------------------------------------- */
unsigned int IdValLbl(DomainInfo *D, unsigned int IdAttr, char *Value)
{
  unsigned int pos=1;
  char **values;

  if (ContAttr(D,IdAttr)) return(0);

  values = (char **) D->attrs[IdAttr].vals;
  for(pos = 1; pos <= NValsAttr(D,IdAttr); pos++) 
    if (!strcmp(values[pos], Value)) return pos;

  return 0;
}
/* =================================================================
   OBTAINS THE EXTERNAL LABEL THAT CORRESPONDS TO AN INTERNAL 
   ATTRIBUTE ID.
   ----------------------------------------------------------------- */
char *LblValId(DomainInfo *D, unsigned int IdAttr, unsigned int IdVal)
{
  char **values;

  values = (char **) D->attrs[IdAttr].vals;
  return(values[IdVal]);
}  

/* =================================================================
   OBTAINS THE INTERNAL ID CORRESPONDING TO AN ATTRIBUTE NAME.
   ----------------------------------------------------------------- */
unsigned int IdAttr(DomainInfo *D, char *name)
{
  int i;

  ForAllAttributes(D,i) 
    if (!strcmp(name,NameAttr(D,i))) return(i);
  return 0;
}




