use strict;
use File::Spec;

my $buildno = '0.1.2014.02.06';

print(STDERR <<"_END");
clsplitsff $buildno
=======================================================================

Official web site of this script is
http://www.fifthdimension.jp/products/claident/ .
To know script details, see above URL.

Copyright (C) 2011-2014  Akifumi S. Tanabe

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

_END

# display usage if command line options were not specified
unless (@ARGV) {
	&helpMessage();
}

# initialize variables
my $devnull = File::Spec->devnull();
my $numthreads = 1;
my $outputfolder = $ARGV[-1];
my $inputfiles;
my @fastafiles;
my $runname;
my $nodel;
my $append;

{
	my %fastafiles;
	my %inputfiles;
	for (my $i = 0; $i < scalar(@ARGV) - 1; $i ++) {
		if ($ARGV[$i] =~ /^-+fasta(?:file)?=(.+)$/i) {
			my @temp = glob($1);
			if (scalar(@temp) > 0) {
				foreach (@temp) {
					if (!exists($fastafiles{$_})) {
						$fastafiles{$_} = 1;
						push(@fastafiles, $_);
					}
					else {
						&errorMessage(__LINE__, "\"$_\" is doubly specified.");
					}
				}
			}
			else {
				&errorMessage(__LINE__, "FASTA file does not exist.");
			}
		}
		elsif ($ARGV[$i] =~ /^-+runname=(.+)$/i) {
			$runname = $1;
		}
		elsif ($ARGV[$i] =~ /^-+(?:n|n(?:um)?threads?)=(\d+)$/i) {
			$numthreads = $1;
		}
		elsif ($ARGV[$i] =~ /^-+nodel$/i) {
			$nodel = 1;
		}
		elsif ($ARGV[$i] =~ /^-+(?:a|append)$/i) {
			$append = 1;
		}
		else {
			my @temp = glob($ARGV[$i]);
			if (scalar(@temp) > 0) {
				foreach (@temp) {
					if (!exists($inputfiles{$_})) {
						$inputfiles{$_} = 1;
						$inputfiles .= " $_";
					}
					else {
						&errorMessage(__LINE__, "\"$_\" is doubly specified.");
					}
				}
			}
			else {
				&errorMessage(__LINE__, "Input file does not exist.");
			}
		}
	}
}
# validation
if (-e $outputfolder && !$append) {
	&errorMessage(__LINE__, "\"$outputfolder\" already exists.");
}
if (!$inputfiles) {
	&errorMessage(__LINE__, "Input file was not specified.");
}
if (!@fastafiles) {
	&errorMessage(__LINE__, "FASTA file was not specified.");
}

# make output folder
if (!-e $outputfolder && !mkdir($outputfolder)) {
	&errorMessage(__LINE__, "Cannot make output folder.");
}

# extract subset sff from sff files
print(STDERR "Extracting SFF from SFF...\n");
{
	my $child = 0;
	$| = 1;
	$? = 0;
	for (my $i = 0; $i < scalar(@fastafiles); $i ++) {
		if (my $pid = fork()) {
			$child ++;
			if ($child >= $numthreads) {
				if (wait == -1) {
					$child = 0;
				} else {
					$child --;
				}
			}
			if ($?) {
				&errorMessage(__LINE__, "The processes did not finished correctly.");
			}
			next;
		}
		else {
			my $prefix = $fastafiles[$i];
			$prefix =~ s/^.+\///;
			$prefix =~ s/\.[^\.]+$//;
			print(STDERR "$prefix...\n");
			if ($runname) {
				$prefix =~ s/^.+?__/$runname\__/;
			}
			if (-e "$outputfolder/$prefix.sff") {
				&errorMessage(__LINE__, "\"$outputfolder/$prefix.sff\" already exists.");
			}
			if (-e "$outputfolder/$prefix.txt") {
				&errorMessage(__LINE__, "\"$outputfolder/$prefix.txt\" already exists.");
			}
			my $inputhandle;
			unless (open($inputhandle, "< $fastafiles[$i]")) {
				&errorMessage(__LINE__, "Cannot read \"$fastafiles[$i]\".");
			}
			my $outputhandle;
			unless (open($outputhandle, "> $outputfolder/$prefix.txt")) {
				&errorMessage(__LINE__, "Cannot write \"$outputfolder/$prefix.txt\".");
			}
			while (<$inputhandle>) {
				if (/^>\s*(.+?)__/) {
					print($outputhandle "$1\n");
				}
			}
			close($outputhandle);
			close($inputhandle);
			if (system("sfffile -o $outputfolder/$prefix.sff -i $outputfolder/$prefix.txt$inputfiles 2> $devnull 1> $devnull")) {
				&errorMessage(__LINE__, "Cannot run \"sfffile -o $outputfolder/$prefix.sff -i $outputfolder/$prefix.txt$inputfiles\".");
			}
			unless ($nodel) {
				unlink("$outputfolder/$prefix.txt");
			}
			exit;
		}
	}
	# join
	while (wait != -1) {
		if ($?) {
			&errorMessage(__LINE__, "The processes did not finished correctly.");
		}
	}
}
print(STDERR "done.\n\n");

sub errorMessage {
	my $lineno = shift(@_);
	my $message = shift(@_);
	print(STDERR "ERROR!: line $lineno\n$message\n");
	print(STDERR "If you want to read help message, run this script without options.\n");
	exit(1);
}

sub helpMessage {
	print(STDERR <<"_END");
Usage
=====
clsplitsff options inputfile1 inputfile2 ... inputfileN outputfolder

Command line options
====================
--fastafile=FILENAME(,FILENAME,...)
  Specify FASTA file names. Wild cards can be used. (default: none)

--runname=RUNNAME
  Specify run name for replacing run name.
(default: FASTA file name without extension)

-n, --numthreads=INTEGER
  Specify the number of processes. (default: 1)

--nodel
  If this option is specified, all temporary files will not deleted.

Acceptable input file formats
=============================
SFF
_END
	exit;
}
