

#include <lpo.h>

extern LPOScore_T caties_scoring_function(int i,
				   int j,
				   LPOLetter_T seq_x[],
				   LPOLetter_T seq_y[],
				   ResidueScoreMatrix_T *m)
     ;  /* INCLUDE THIS HERE SO WE CAN PASS IT TO buildup_progressive_lpo() */



int main(int argc,char *argv[])
{
  int i,ibundle=ALL_BUNDLES,nframe_seq=0,use_reverse_complement=0;
  int nseq=0,do_switch_case=dont_switch_case,do_analyze_bundles=0;
  char score_file[256],seq_file[256],*comment=NULL,*al_name="test align";
  ResidueScoreMatrix_T score_matrix; /* DEFAULT GAP PENALTIES*/
  Sequence_T *seq=NULL,*lpo_out=NULL,*frame_seq=NULL,*dna_lpo=NULL,
  *lpo_in=NULL;
  FILE *seq_ifile=NULL,*errfile=stderr,*logfile=NULL,*lpo_file_out=NULL,
    *subset_ifile=NULL;
  char *print_matrix_letters=NULL,*fasta_out=NULL,*po_out=NULL,*matrix_filename=NULL,
  *seq_filename=NULL,*frame_dna_filename=NULL,*po_filename=NULL,
  *hbmin=NULL,*numeric_data=NULL,*numeric_data_name="Nmiscall",*dna_to_aa=NULL,
    *pair_score_file=NULL,*aafreq_file=NULL,*termval_file=NULL,
    *bold_seq_name=NULL,*subset_file=NULL,*rm_subset_file=NULL;
  float bundling_threshold=0.9;
  int exit_code=0,count_sequence_errors=0,please_print_snps=0,
    report_consensus_seqs=0,report_major_allele=0,use_aggressive_fusion=0;
  int show_allele_evidence=0,please_collapse_lines=0,keep_all_links=0;
  int remove_listed_seqs=0,please_report_similarity;
  char *reference_seq_name="CONSENS%d",*clustal_out=NULL;
  int use_global_alignment=0;
  

  black_flag_init(argv[0],PROGRAM_VERSION);

  if (argc<2) {
    fprintf(stderr,"\nUsage: %s [OPTIONS] matrixfile\n\n"
"  OPTIONS\t\t\tFUNCTION\n"
"  -------\t\t\t________\n"
"INPUT:\n"
"  -read_fasta FILENAME\t\tReads in FASTA sequence file.\n"
"  -tolower\t\t\tForces FASTA sequences to lowercase\n"
"\t\t\t\t(nucleotides in our matrix files)\n"
"  -toupper\t\t\tForces FASTA sequences to UPPERCASE\n"
"\t\t\t\t(amino acids in our matrix files)\n"
"  -read_po FILENAME\t\tReads in PO file.\n"
"  -subset FILENAME\t\tFilters PO-MSA to include list of seqs in file.\n"
"  -remove FILENAME\t\tFilters PO-MSA to exclude list of seqs in file.\n"
"\n"
"\nALIGNMENT:\n"
"  -pair_score FILENAME\t\tReads tab-delimited file of sequence-sequence\n"
"\t\t\t\tsimilarity scores for constructing a guide-tree\n"
"\t\t\t\tand performing progressive alignment using\n"
"\t\t\t\tPO-PO alignment steps.\n"
"  -fuse_all\t\t\tFuses identical letters on align rings.\n"
"\nANALYSIS:\n"
"  -hb\t\t\t\tPerforms heaviest bundling to generate consensi.\n"
"  -hbmin VALUE\t\t\tBundles into heaviest bundle seqs with percent id >= value.\n"
"\nOUTPUT:\n"
"  -best\t\t\t\tRestricts MSA output to heaviest bundles.\n"
"  -pir FILENAME\t\t\tWrites out MSA in PIR format.\n"
"  -clustal FILENAME\t\tWrites out MSA in CLUSTAL format.\n"
"  -po FILENAME\t\t\tWrites out MSA in PO format.\n"
"  -printmatrix LETTERSET\tPrints score matrix to stdout.\n"
"  -v\t\t\t\tRuns in verbose mode.\n\n"
"  Note:  Either the -read_fasta or -read_po argument must be used with poa,\n" 
"         since a FASTA file or PO file must be read in by poa.\n\n"
"For more information, see http://www.bioinformatics.ucla.edu/poa\n\n"
	    ,argv[0]);
    exit(-1);
  }

  FOR_ARGS(i,argc) { /* READ ALL THE ARGUMENTS */
    ARGMATCH_VAL("-tolower",do_switch_case,switch_case_to_lower);
    ARGMATCH_VAL("-toupper",do_switch_case,switch_case_to_upper);
    ARGMATCH_VAL("-v",logfile,stdout);
    ARGMATCH_VAL("-best",ibundle,0); /*RESTRICT FASTA OUTPUT TO HB */
    ARGMATCH_VAL("-hb",do_analyze_bundles,1);/*CALCULATE HEAVIEST BUNDLING*/
    ARGGET("-printmatrix",print_matrix_letters);
    ARGGET("-read_po",po_filename); /* READ A PO FILE FOR ALIGNMENT/ANALYSIS*/
    ARGGET("-pir",fasta_out); /* SAVE FASTA-PIR FORMAT ALIGNMENT FILE */
    ARGGET("-clustal",clustal_out); /* SAVE CLUSTAL FORMAT ALIGNMENT FILE */
    ARGGET("-po",po_out); /* SAVE PO FORMAT ALIGNMENT FILE */
    ARGGET("-hbmin",hbmin); /* SET THRESHOLD FOR BUNDLING */
    ARGMATCH("-fuse_all",use_aggressive_fusion);
    ARGMATCH("-global",use_global_alignment);
    ARGGET("-pair_score",pair_score_file); /* FILENAME TO READ PAIR SCORES*/
    ARGGET("-subset",subset_file); /* FILENAME TO READ SEQ SUBSET LIST*/
    ARGGET("-remove",rm_subset_file); /* FILENAME TO READ SEQ REMOVAL LIST*/
    ARGGET("-read_fasta",seq_filename); /* READ FASTA FILE FOR ALIGNMENT */
    NEXTARG(matrix_filename); /* NON-FLAG ARG SHOULD BE MATRIX FILE */
   
  }

  if (rm_subset_file) { /* TREAT SUBSET FILE AS LIST OF SEQS TO REMOVE */
    subset_file=rm_subset_file;
    remove_listed_seqs=1;
  }

  if (hbmin)
    bundling_threshold=atof(hbmin);

  if (!matrix_filename ||
      read_score_matrix(matrix_filename,&score_matrix)<=0){/* READ MATRIX */
    WARN_MSG(USERR,(ERRTXT,"Error reading matrix file %s.  Exiting",
		    matrix_filename? matrix_filename: "(null)"),"$Revision: 1.2 $");
    exit_code=1; /* SIGNAL ERROR CONDITION */
    goto free_memory_and_exit;
  }

  if (logfile) {
    fprintf(logfile, "X-Gap Penalties: %d %d %d\n",
	    score_matrix.gap_penalty_set[0][0],
	    score_matrix.gap_penalty_set[0][1],
	    score_matrix.gap_penalty_set[0][2]);
    fprintf(logfile, "Y-Gap Penalties: %d %d %d\n",
	    score_matrix.gap_penalty_set[1][0],
	    score_matrix.gap_penalty_set[1][1],
	    score_matrix.gap_penalty_set[1][2]);
  }
  
  if (print_matrix_letters) /* USER WANTS US TO PRINT A MATRIX */
    print_score_matrix(stdout,&score_matrix,print_matrix_letters
		       /*"ARNDCQEGHILKMFPSTWYV"*/);

  if (po_filename) { /* READ PARTIAL ORDER FILE */
    if (seq_ifile=fopen(po_filename,"r")) {
      if (subset_file) { /* LIST OF SEQS TO FILTER THE PO WITH */
	subset_ifile=fopen(subset_file,"r");
	if (!subset_ifile) {
	  WARN_MSG(USERR,(ERRTXT,"Error reading subset file %s.  Exiting",
			  subset_file),"$Revision: 1.2 $");
	  exit_code=1; /* SIGNAL ERROR CONDITION */
	  goto free_memory_and_exit;
	}
	lpo_in=read_lpo_select(seq_ifile,subset_ifile,keep_all_links,
			       remove_listed_seqs);
	fclose(subset_ifile);
      }
      else
	lpo_in=read_lpo(seq_ifile); /* READ LPO NORMALLY W/O FILTERING */
      fclose(seq_ifile);
    }
    if (!lpo_in) {
      WARN_MSG(USERR,(ERRTXT,"Error reading PO file %s!!!\nExiting.",
		      po_filename),"$Revision: 1.2 $");
      exit_code=1; /* SIGNAL ERROR CONDITION */
      goto free_memory_and_exit;
    }
  }

  if (seq_filename) { /* READ SEQUENCES TO ALIGN */
    seq_ifile=fopen(seq_filename,"r"); /* READ SEQUENCE DATABASE */
    if (seq_ifile) {
      nseq=read_fasta(seq_ifile,&seq,do_switch_case,&comment);
      fclose(seq_ifile);
    }
  }
  
  if (nseq>0) { /* INITIALIZE THE SEQUENCES AND RUN THE ALIGNMENT */
    fprintf(errfile,"...Read %d sequences from %s...\n",nseq,seq_filename);
    /*initialize_seqs_as_lpo(nseq,seq,&score_matrix); */
    if (lpo_in) { /* ADD TO OUR EXISTING ALIGNMENT */
      lpo_out=buildup_lpo(lpo_in,nseq,seq,&score_matrix,
			  use_aggressive_fusion,
			  use_global_alignment);/*BUILD ALIGNMENT*/
      lpo_in=NULL; /* DATA STRUCTURE NOW POINTED TO BY lpo_out, 
		      SO DON'T FREE IT TWICE!! */
    }
    else if (pair_score_file) {
      lpo_out=buildup_progressive_lpo(nseq,seq,&score_matrix,
				      use_aggressive_fusion,
				      pair_score_file,
				      caties_scoring_function,
				      use_global_alignment);
    }
    else /* OTHERWISE JUST BUILDUP ON TOP OF FIRST READ SEQUENCE */
      lpo_out=buildup_lpo(seq,nseq-1,seq+1,&score_matrix,
			  use_aggressive_fusion,
			  use_global_alignment);
  }
  else if (lpo_in) /* FILTERED LPO... CAN PRINT OUT AS LPO */
    lpo_out=lpo_in;
  else { /* HMM... NO DATA TO WORK WITH AT ALL.  MUST BE AN ERROR. COMPLAIN!*/
    WARN_MSG(USERR,(ERRTXT,"Error reading sequence file %s and PO file %s. Exiting.", 
            seq_filename? seq_filename: "because none specified",
            po_filename? po_filename:  "because none specified"),"$Revision: 1.2 $");
    exit_code=1; /* SIGNAL ERROR CONDITION */
    goto free_memory_and_exit;
  }
  
  if (comment) { /* SAVE THE COMMENT LINE AS TITLE OF OUR LPO */
    FREE(lpo_out->title);
    lpo_out->title=strdup(comment);
  }

  if (do_analyze_bundles) /* DIVIDE INTO BUNDLES W/ CONSENSUS */
    generate_lpo_bundles(lpo_out,bundling_threshold);

  if (po_out) { /* WRITE FINAL PARTIAL ORDER ALIGNMENT TO OUTPUT */
    if (lpo_file_out=fopen(po_out, "w")) {
       write_lpo(lpo_file_out,lpo_out,&score_matrix);
       fclose(lpo_file_out);
    }
    else {
      WARN_MSG(USERR,(ERRTXT,"*** Could not save PO file %s.  Exiting.",
	      po_out),"$Revision: 1.2 $");
      exit_code=1; /* SIGNAL ERROR CONDITION */ 
    }
  }

  if (fasta_out) { /* WRITE FINAL ALIGNMENT IN FASTA FORMAT */
    if (seq_ifile=fopen(fasta_out,"w")) { /* FASTA ALIGNMENT*/
      write_lpo_bundle_as_fasta(seq_ifile,lpo_out,score_matrix.nsymbol,
				score_matrix.symbol,ibundle);
      fclose(seq_ifile);
    }
    else {
      WARN_MSG(USERR,(ERRTXT,"*** Could not save FASTA file %s.  Exiting.",
	      fasta_out),"$Revision: 1.2 $");
      exit_code=1; /* SIGNAL ERROR CONDITION */ 
   }
  }

  if (clustal_out) { /* WRITE FINAL ALIGNMENT IN FASTA FORMAT */
    if (seq_ifile=fopen(clustal_out,"w")) { /* FASTA ALIGNMENT*/
      export_clustal_seqal(seq_ifile,lpo_out,score_matrix.nsymbol,
			   score_matrix.symbol);
      fclose(seq_ifile);
    }
    else {
      WARN_MSG(USERR,(ERRTXT,"*** Could not save CLUSTAL file %s.  Exiting.",
	      fasta_out),"$Revision: 1.2 $");
      exit_code=1; /* SIGNAL ERROR CONDITION */ 
   }
  }



 free_memory_and_exit: /* FREE ALL DYNAMICALLY ALLOCATED DATA!!!! */
  if (dna_lpo)
    free_lpo_sequence(dna_lpo,TRUE);
  if (lpo_in)
    free_lpo_sequence(lpo_in,TRUE);
  if (lpo_out != seq && lpo_out != lpo_in)
    free_lpo_sequence(lpo_out,TRUE);
  LOOPB (i,nseq) /* seq[] WAS ALLOCATED AS ONE ARRAY, SO FREE seq[0] LAST */
    free_lpo_sequence(seq+i,i==0); /* FREE BLOCK AFTER ALL HOLDERS EMPTY*/
  exit(exit_code);
}

