//******************************************************************************
//
// File:    MaxParsSmp.java
// Package: edu.rit.phyl.pars
// Unit:    Class edu.rit.phyl.pars.MaxParsSmp
//
// This Java source file is copyright (C) 2007 by Alan Kaminsky. All rights
// reserved. For further information, contact the author, Alan Kaminsky, at
// ark@cs.rit.edu.
//
// This Java source file is part of the Parallel Java Library ("PJ"). PJ is free
// software; you can redistribute it and/or modify it under the terms of the GNU
// General Public License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// PJ is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
// A PARTICULAR PURPOSE. See the GNU General Public License for more details.
//
// A copy of the GNU General Public License is provided in the file gpl.txt. You
// may also obtain a copy of the GNU General Public License on the World Wide
// Web at http://www.gnu.org/licenses/gpl.html.
//
//******************************************************************************

package edu.rit.phyl.pars;

import edu.rit.pj.Comm;
import edu.rit.pj.ParallelRegion;
import edu.rit.pj.ParallelSection;
import edu.rit.pj.ParallelTeam;

import java.io.File;

import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

/**
 * Class MaxParsSmp is the main program for maximum parsimony phylogenetic tree
 * construction using branch-and-bound search on an SMP parallel computer. The
 * program reads a list of DNA sequences from the given input file in
 * interleaved PHYLIP format; see class {@linkplain DnaSequenceList} for further
 * information. If the argument <I>S</I> is given, the program considers only
 * the first <I>S</I> DNA sequences, otherwise the program considers all the DNA
 * sequences. The program generates rooted bifurcating trees with those DNA
 * sequences, computes each tree's parsimony score (number of state changes)
 * using the Fitch algorithm, and uses the branch-and-bound technique to avoid
 * unnecessary searching. The program prints the best tree or trees -- those
 * with the smallest score -- on the standard output in Newick Standard format.
 * <P>
 * The program runs in multiple parallel threads on an SMP parallel computer.
 * All threads traverse the search tree simultaneously down to a certain depth
 * <I>D</I>. Past level <I>D</I>, separate threads traverse separate branches of
 * the search tree. The value of <I>D</I> is chosen so that traversing the first
 * <I>D</I> levels is fast, but there are enough subproblems below level
 * <I>D</I> to yield good load balancing. If not specified, <I>D</I> = 7 is used
 * by default; there are (2*7-3)!! = 10,395 subproblems in the search tree at
 * level 7. If the number of DNA sequences is less than or equal to <I>D</I>,
 * the program eschews multiple threads and does the entire search in a single
 * thread.
 * <P>
 * Usage: java -Dpj.nt=<I>K</I> edu.rit.phyl.pars.MaxParsSmp <I>infile</I>
 * [ <I>S</I> [ <I>D</I> ] ]
 * <BR><I>K</I> = Number of parallel threads
 * <BR><I>infile</I> = DNA sequence file in interleaved PHYLIP format
 * <BR><I>S</I> = Number of DNA sequences to consider (default: all)
 * <BR><I>D</I> = Level to start parallel searching (default: 7)
 *
 * @author  Alan Kaminsky
 * @version 05-May-2007
 */
public class MaxParsSmp
	{

// Prevent construction.

	private MaxParsSmp()
		{
		}

// Constants.

	// Default parallel search depth.
	static final int DEFAULT_DEPTH = 7;

// Global variables.

	// List of DNA sequences read from the input file.
	static DnaSequenceList sequences;

	// Number of DNA sequences.
	static int S;

	// Parallel search depth.
	static int D;

	// List of DNA sequences with uninformative sites excised.
	static DnaSequenceList excised;

	// Number of state changes due to uninformative sites.
	static int uninformativeStateChanges;

	// Best score found so far.
	static AtomicInteger bestScore = new AtomicInteger (Integer.MAX_VALUE);

	// Keeps track which subproblems have been searched in parallel.
	static AtomicLong searchNodeID = new AtomicLong (0L);

	// Stringifier for printing trees.
	static DnaSequenceTree.Stringifier stringifier =
		new DnaSequenceTree.Stringifier()
			{
			public String toString
				(DnaSequenceTree.Node node)
				{
				return sequences.getName (node.sequence().score());
				}
			};

// Main program.

	/**
	 * Main program.
	 */
	public static void main
		(String[] args)
		throws Exception
		{
		Comm.init (args);

		// Start timing.
		long time = -System.currentTimeMillis();

		// Parse command line arguments.
		if (args.length < 1 || args.length > 3) usage();
		File infile = new File (args[0]);
		S = args.length < 2 ? 0 : Integer.parseInt (args[1]);
		D = args.length < 3 ? DEFAULT_DEPTH : Integer.parseInt (args[2]);

		// Read DNA sequences from input file, truncate unwanted ones, warn of
		// duplicates.
		System.out.println ("Reading input file ...");
		sequences = DnaSequenceList.read (infile);
		if (S > 0) sequences.truncate (S);
		S = sequences.length();
		System.out.println (S + " sequences");
		for (int s = 0; s < S; ++ s)
			{
			System.out.println ("\t" + sequences.getName (s));
			}
		System.out.println (sequences.getDnaSequence(0).length() + " sites");
		sequences.warnOfDuplicates();

		// Excise uninformative sites, warn of duplicates.
		System.out.println ("Excising uninformative sites ...");
		excised = new DnaSequenceList();
		uninformativeStateChanges =
			sequences.exciseUninformativeSites (excised);
		System.out.println
			(excised.getDnaSequence(0).length() + " informative sites");
		System.out.println
			(uninformativeStateChanges +
			 " state changes from uninformative sites");
		excised.warnOfDuplicates();

		// Shuffle sequences into descending distance order.
		System.out.println ("Shuffling into descending distance order ...");
		excised.shuffleDescendingDistance();
		for (int s = 0; s < S; ++ s)
			{
			System.out.println ("\t" + excised.getName (s));
			}

		// Set stringifier for printing tree tip nodes.
		DnaSequenceTree.defaultStringifier (stringifier);

		// Determine number of parallel threads. Only need multiple threads if
		// number of DNA sequences exceeds parallel search depth.
		int K = S > D ? ParallelTeam.getDefaultThreadCount() : 1;

		new ParallelTeam(K).execute (new ParallelRegion()
			{
			public void run() throws Exception
				{
				// Allocate tree stack with S levels (per-thread).
				final DnaSequenceTree[] treeStack = new DnaSequenceTree [S];
				for (int s = 0; s < S; ++ s)
					{
					treeStack[s] = new DnaSequenceTree (S);
					}

				// Allocate best tree list (per-thread).
				final DnaSequenceTreeList bestTreeList =
					new DnaSequenceTreeList (S);

				// Generate trees at all levels.
				treeStack[0].addTipNode (0, excised.getDnaSequence (0));
				generateTrees (treeStack, bestTreeList, 0, 0L);

				// Wait for all threads to finish.
				barrier();

				// Reconcile this thread's best score with global best score.
				bestTreeList.bestScore (bestScore.get());

				// Print best trees, one thread at a time.
				critical (new ParallelSection()
					{
					public void run()
						{
						for (DnaSequenceTree tree : bestTreeList)
							{
							System.out.println (tree);
							}
						}
					});
				}
			});

		// Stop timing.
		time += System.currentTimeMillis();
		System.out.println (time + " msec");

		// Print best score.
		System.out.println (bestScore + " state changes in best tree(s)");
		}

// Hidden operations.

	/**
	 * Generate all trees at the given level.
	 *
	 * @param  treeStack     Tree stack.
	 * @param  bestTreeList  Best tree list.
	 * @param  level         Level.
	 * @param  id            Search node ID.
	 */
	private static void generateTrees
		(DnaSequenceTree[] treeStack,
		 DnaSequenceTreeList bestTreeList,
		 int level,
		 long id)
		{
		// Get tree at current level.
		DnaSequenceTree currentTree = treeStack[level];
		int levelPlus1 = level + 1;

		// If all DNA sequences are in the current tree, record it in the best
		// tree list.
		if (levelPlus1 == S)
			{
			// Reconcile this thread's best score with global best score.
			bestTreeList.bestScore (bestScore.get());

			// Add current tree.
			int oldBestScore = bestTreeList.bestScore();
			treeStack[level] = bestTreeList.add (currentTree);
			int newBestScore = bestTreeList.bestScore();

			// If this thread's best score changed, update global best score.
			if (oldBestScore != newBestScore)
				{
				bestScoreMinimum (newBestScore);
				}
			}

		// Not all DNA sequences are in the current tree. If we're not at level
		// D, proceed. If we are at level D and this thread is the first to
		// encounter the current search node ID, proceed. Otherwise, backtrack.
		else if (levelPlus1 != D || firstToReachID (id))
			{
			// Generate all possible trees at the next level, unless we've
			// exceeded the best score so far.
			DnaSequence seq = excised.getDnaSequence (levelPlus1);
			int n = currentTree.nodeCount();
			id = id * n;
			for (int i = 0; i < n; ++ i)
				{
				DnaSequenceTree nextTree = treeStack[levelPlus1];
				nextTree.copy (currentTree);
				nextTree.updateFitchScore (nextTree.addTipNode (i, seq));
				if (nextTree.score() <= bestScore.get())
					{
					generateTrees (treeStack, bestTreeList, levelPlus1, id+i);
					}
				}
			}
		}

	/**
	 * Set the global best score to the smaller of its current value and the
	 * given value.
	 *
	 * @param  score  Best score.
	 */
	private static void bestScoreMinimum
		(int score)
		{
		for (;;)
			{
			int oldscore = bestScore.get();
			int newscore = Math.min (oldscore, score);
			if (bestScore.compareAndSet (oldscore, newscore)) return;
			}
		}

	/**
	 * Determine if the calling thread is the first to reach the given search
	 * node ID. If so, <TT>searchNodeID</TT> is set to one greater than
	 * <TT>id</TT> and true is returned. If not, <TT>searchNodeID</TT> is
	 * unchanged and false is returned.
	 *
	 * @param  id  Search node ID.
	 *
	 * @return  True if the calling thread is the first to reach <TT>id</TT>,
	 *          false otherwise.
	 */
	private static boolean firstToReachID
		(long id)
		{
		long newid = id + 1;
		for (;;)
			{
			long oldid = searchNodeID.get();
			if (oldid > id) return false;
			if (searchNodeID.compareAndSet (oldid, newid)) return true;
			}
		}

	/**
	 * Print a usage message and exit.
	 */
	private static void usage()
		{
		System.err.println ("Usage: java edu.rit.phyl.pars.MaxParsSmp <infile> [<S>]");
		System.err.println ("<infile> = DNA sequence file in interleaved PHYLIP format");
		System.err.println ("<S> = Number of DNA sequences to consider (default: all)");
		System.exit (1);
		}

	}
