

#include "Starter.h"

extern int size_seeds;
extern char * prefix_trashable;

Starter::Starter(char *starter_sequence){fragment = new Fragment(starter_sequence);}
Starter::Starter(Fragment *fragment):fragment(fragment){}
Starter::~Starter(){
    delete fragment;
    for(int i=0;i<nb_substarters;i++)
        free(substarters_sequences[i]);
    free(substarters_sequences);
    free(right_kmers);
    free(left_kmers);
}


static char ** remove_redondancies (char ** extensions, int * number_of_extensions){
    if(*number_of_extensions<2) return extensions;
    extensions=sort_strings(extensions,*number_of_extensions);
    
    int i;
    int idx=1; // we keep the first extension, so we start at 1.
    
    for (i=1;i<(*number_of_extensions);i++)
        if(strcmp(extensions[i-1],extensions[i])!=0){ // store extensions[i] as it is different from extensions[i-1]
            extensions[idx++]=extensions[i];
        }
    
    *number_of_extensions=idx;
    extensions = (char **) realloc(extensions,sizeof(char *)*idx);
    return extensions;
}

void Starter::generate_substarter_no_mapping(Extension_Bank *extension_bank, const int size_kmers){
    
    char * kmer;// = (char *) malloc (sizeof(char)*(size_seeds+1)); test_alloc(kmer);
    right_kmers = (int *) malloc (sizeof(int)); test_alloc(right_kmers);
    left_kmers = (int *) malloc (sizeof(int)); test_alloc(left_kmers);
    substarters_sequences = (char **) malloc (sizeof(char *)); // a unique substarter
    test_alloc(substarters_sequences);
    substarters_sequences[0] = strdup(fragment->fragment_sequence);
    nb_substarters=1;
    // get the last kmer
    kmer=strndup(fragment->fragment_sequence+strlen(fragment->fragment_sequence)-size_kmers,size_kmers);
    // add it in the extension_bank of kmers to be extended
    right_kmers[0]=extension_bank->maybe_add_a_new_extension(kmer);
    free(kmer);
    // get the first kmer
    kmer=strndup(fragment->fragment_sequence,size_kmers);
    // reverse complement it (left extension = right extension of a reverse complemented kmer)
    revcomp(kmer,size_kmers);
    //   printf("left revcomp kmer = %s\n", kmer); // DEB
    // add it in the extension_bank of kmers to be extended
    left_kmers[0]=extension_bank->maybe_add_a_new_extension(kmer);
    free(kmer);
    
    
}

int Starter::generate_substarters(Extension_Bank * extension_bank, const int substitutions_allowed, const int min_coverage, const int error_correction_threshold, const int size_kmers){
    if(fragment->Mapped==NULL){
        // no read was mapped on this starter.
        substarters_sequences=NULL;
        nb_substarters=0;
        
    }
    else{
        substarters_sequences = read_coherent_homologs(fragment->Mapped,
                                                       fragment->fragment_sequence,
                                                       substitutions_allowed,
                                                       min_coverage,
                                                       error_correction_threshold,
                                                       size_kmers,
                                                       &nb_substarters);
        
        substarters_sequences = remove_redondancies(substarters_sequences, &nb_substarters);
    }
    right_kmers = (int *) malloc (sizeof(int)*nb_substarters); test_alloc(right_kmers);
    left_kmers = (int *) malloc (sizeof(int)*nb_substarters); test_alloc(left_kmers);
    if(nb_substarters) fragment->read_coherent=true;
    else fragment->read_coherent=false;
    
//    fprintf(stderr,"\nstarter_sequence = %s has %d substarters \n", fragment->fragment_sequence, nb_substarters);
    
    char * kmer;// = (char *) malloc (sizeof(char)*(size_seeds+1)); test_alloc(kmer);
    
    for( int i=0;i<nb_substarters;i++ ){
        // printf("substarter %s\n", substarters_sequences[i]); //DEB
        // get the last kmer
        kmer=strndup(substarters_sequences[i]+strlen(substarters_sequences[i])-size_kmers,size_kmers);
        // add it in the extension_bank of kmers to be extended
        right_kmers[i]=extension_bank->maybe_add_a_new_extension(kmer);
        free(kmer);
        // get the first kmer
        kmer=strndup(substarters_sequences[i],size_kmers);
        // reverse complement it (left extension = right extension of a reverse complemented kmer)
        revcomp(kmer,size_kmers);
        //   printf("left revcomp kmer = %s\n", kmer); // DEB
        // add it in the extension_bank of kmers to be extended
        left_kmers[i]=extension_bank->maybe_add_a_new_extension(kmer);
        free(kmer);
    }
    
    return nb_substarters;
}


//TODO : bank-> all extension
id_els Starter::starter_output_graph(Extension_Bank *bank, GraphOutput graph, id_els first_id_els){ 
//  printf("OUTPUT GRAPHS\n");
    long extremKmerRightId = 0;
    long extremKmerLeftId = 0;
    
     
    // graph.print_edges_end();
  // for each substarter:
  for(int i=0;i<nb_substarters;i++){
    long first_extending_left_node;
    long first_extending_right_node;
    //bool newExtensionRight = false;
    //bool newExtensionLeft = false;
   // bool writeHead = false;

    // RIGHT KMER
    // test if the extension was not allready created. In this case  bank->all_extensions[right_kmers[i]].node_id>=0

    if(bank->all_extensions[right_kmers[i]].node_id<0){ // This extension has never been seen before
      // set the node_id
      bank->all_extensions[right_kmers[i]].node_id=first_id_els.node;
      first_extending_right_node=first_id_els.node; 
      string right_kmer =  bank->all_extensions[right_kmers[i]].starting_kmer;
      graph.load_nodes_extremities((char *)(prefix_trashable+right_kmer).c_str());
      first_id_els=graph.construct_graph(prefix_trashable+right_kmer, "RIGHT");
      graph.first_id_els=first_id_els;
      //graph.print_head(bank->all_extensions[right_kmers[i]].node_id, bank->all_extensions[left_kmers[i]].node_id);
      //writeHead = true;
      //newExtensionRight = true;
      extremKmerRightId++;     
    }    
    else first_extending_right_node = bank->all_extensions[right_kmers[i]].node_id;

    // LEFT KMER
    // test if the extension was not allready created. In this case  bank->all_extensions[right_kmers[i]].node_id>=0 
    if(bank->all_extensions[left_kmers[i]].node_id<0){ // This extension has never been seen before
      // set the node_id
      bank->all_extensions[left_kmers[i]].node_id=first_id_els.node;
      first_extending_left_node=first_id_els.node; 
      string left_kmer =  bank->all_extensions[left_kmers[i]].starting_kmer; 
      graph.load_nodes_extremities((char *)(prefix_trashable+left_kmer).c_str());
      first_id_els=graph.construct_graph(prefix_trashable+left_kmer, "LEFT");
      graph.first_id_els=first_id_els;
      //if(!writeHead) graph.print_head(bank->all_extensions[right_kmers[i]].node_id, bank->all_extensions[left_kmers[i]].node_id);
       //newExtensionLeft = true;
      extremKmerLeftId++;
    }
    else first_extending_left_node = bank->all_extensions[left_kmers[i]].node_id;
	
    if(i==0){
	extremKmerRightId=0;
	extremKmerLeftId=1;
        
    }

    /*if(newExtensionRight){
	      	graph.print_sublink(first_id_els.edge, first_id_els.node, first_extending_right_node,  "ff","");
	      	first_id_els.edge++;
	     	 graph.print_sublink(first_id_els.edge, first_extending_right_node, first_id_els.node,  "rr","");
	     	 first_id_els.edge++;
    };
    if(newExtensionLeft){
		graph.print_sublink(first_id_els.edge, first_id_els.node, first_extending_left_node,  "rf",""); //TODO : double check these edges
		 first_id_els.edge++;
		 graph.print_sublink(first_id_els.edge, first_extending_left_node, first_id_els.node,  "rf","");
		first_id_els.edge++;
    };*/

	
    //graph.print_node(first_id_els.node, substarters_sequences[i], ", \"type\": \"real-starter\""); //TODO: need to extract sequence to starter.fa
    char data [200];
    sprintf (data, "\"length\":%d, \"extremGraphRight\":\"k%d\", \"extremGraphLeft\":\"k%d\"",strlen(substarters_sequences[i]), extremKmerRightId,  extremKmerLeftId);
    
    graph.print_substarter(i, substarters_sequences[i], string(data));
//TODO: Meta info for each sub-graph
//TODO: XGMML fomat

    
    //   // PRINT THE SUBSTARTER IN XML FILE
    //   // create a new node containing the substarter
    //   fprintf(graph.graph_file,"<node id=\"%ld\" label=\"%s\">\n</node>\n",  first_node_id, substarters_sequences[i]); 
    //   // link the new node containing the substarter to the first extending left node. We should take care as the first extended kmer was reverse complemented. So links are rf rf
    //   fprintf(graph.graph_file,"<edge source=\"%ld\" target=\"%ld\" label=\"%s\">\n</edge>\n",first_node_id, first_extending_left_node, "rf");
    //   fprintf(graph.graph_file,"<edge source=\"%ld\" target=\"%ld\" label=\"%s\">\n</edge>\n",first_extending_left_node, first_node_id, "rf");
    //   // link the new node containing the substarter to the first extending right node
    //   fprintf(graph.graph_file,"<edge source=\"%ld\" target=\"%ld\" label=\"%s\">\n</edge>\n",first_node_id, first_extending_right_node, "ff");
    //   fprintf(graph.graph_file,"<edge source=\"%ld\" target=\"%ld\" label=\"%s\">\n</edge>\n",first_extending_right_node, first_node_id, "rr");
    //   graph.first_id++; // A new node (substarter) was added

    // create a new node containing the substarter

    
  }
  graph.print_nodes_end();
  graph.print_substarters_end();  
  return first_id_els;
}

void Starter::starter_output_sequence(Extension_Bank *bank, int index, string output_res, bool erase){
    char * rseq;
    char * lseq;
    int readlen;
    FILE * output_res_file = fopen((char *) output_res.c_str(), erase?"w":"a");
    // for each substarter:
    for(int i=0;i<nb_substarters;i++){
        // get the right kmer
        string right_kmer =  bank->all_extensions[right_kmers[i]].starting_kmer;
        cout<<"opening "<<(char*)(prefix_trashable+right_kmer).c_str()<<endl;
        // get the sequence of the right extension
        Bank * Right = new Bank((char*)(prefix_trashable+right_kmer).c_str());
        // ! we should have a unique sequence in this file !
        //        Right->reset_max_readlen(1000000);
        assert(Right->get_next_seq(&rseq,&readlen));
//        printf("RIGHT: I read %s in %s\n", rseq, (char *)(prefix_trashable+right_kmer).c_str(), direction_right.c_str());
        
        // get the left kmer
        string left_kmer =  bank->all_extensions[left_kmers[i]].starting_kmer;
        //string direction_left =   bank->all_extensions[left_kmers[i]].direction;
        // get the sequence of the left extension
        Bank * Left = new Bank((char*)(prefix_trashable+left_kmer).c_str());
        // ! we should have a unique sequence in this file !
        //        Left->reset_max_readlen(1000000);
        assert(Left->get_next_seq(&lseq,&readlen));
//        printf("LEFT: I read %s in %s (direction %s)\n", lseq, (char *)(prefix_trashable+left_kmer).c_str(), direction_left.c_str());
        //    delete Left;
        // We should revese complement the left sequence
        
        revcomp(lseq,readlen);
        lseq[readlen-sizeKmer+1]='\0';
        
        fprintf(output_res_file, ">%s|starter_%d|substarter_%d\n", fragment->fragment_comment, index, i );
        fprintf(output_res_file, "%s%s%s\n", to_lower(lseq), substarters_sequences[i], to_lower(rseq+sizeKmer-1)); // to_lower(rseq+size_seeds-1) removes the overlapping k-1 mer
        Right->close();
        delete Right;
        Left->close();
        delete Left;
    }
  fclose(output_res_file); 
}
