use strict;
use Digest::MD5;

my $buildno = '0.1.2013.03.14';

print(STDERR <<"_END");
clmaketsv $buildno
=======================================================================

Official web site of this script is
http://www.fifthdimension.jp/products/claident/ .
To know script details, see above URL.

Copyright (C) 2011-2013  Akifumi S. Tanabe

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

_END

# display usage if command line options were not specified
unless (@ARGV) {
	&helpMessage();
}

# initialize variables
my $outputfile = $ARGV[-1];
# check output file
if (-e $outputfile) {
	&errorMessage(__LINE__, "Output file already exists.");
}
my @inputfiles;
my $primerfile;
my $tagfile;

{
	my %inputfiles;
	for (my $i = 0; $i < scalar(@ARGV) - 1; $i ++) {
		if ($ARGV[$i] =~ /^-+(?:primer|primerfile|p)=(.+)$/i) {
			$primerfile = $1;
		}
		elsif ($ARGV[$i] =~ /^-+(?:tag|tagfile|t)=(.+)$/i) {
			$tagfile = $1;
		}
		else {
			my @temp = glob($ARGV[$i]);
			if (scalar(@temp) > 0) {
				foreach (@temp) {
					if (!exists($inputfiles{$_})) {
						$inputfiles{$_} = 1;
						push(@inputfiles, $_);
					}
					else {
						&errorMessage(__LINE__, "\"$_\" is doubly specified.");
					}
				}
			}
			else {
				&errorMessage(__LINE__, "Input file does not exist.");
			}
		}
	}
}
if ($primerfile && !-e $primerfile) {
	&errorMessage(__LINE__, "\"$primerfile\" does not exist.");
}
if ($tagfile && !-e $tagfile) {
	&errorMessage(__LINE__, "\"$tagfile\" does not exist.");
}

my %primer;
if ($primerfile) {
	my @primer;
	my $primerfilehandle;
	unless (open($primerfilehandle, "< $primerfile")) {
		&errorMessage(__LINE__, "Cannot open \"$primerfile\".");
	}
	local $/ = "\n>";
	while (<$primerfilehandle>) {
		if (/^>?\s*(\S[^\r\n]*)\r?\n(.+)\r?\n/s) {
			my $name = $1;
			my $primer = uc($2);
			$name =~ s/\s+$//;
			if ($name =~ /__/) {
				&errorMessage(__LINE__, "\"$name\" is invalid name. Do not use \"__\" in primer name.");
			}
			$primer =~ s/[^A-Z]//sg;
			if (exists($primer{$name})) {
				&errorMessage(__LINE__, "The primer \"$name\" is multiply specified.");
			}
			else {
				$primer{$name} = $primer;
			}
			push(@primer, $name);
		}
	}
	close($primerfilehandle);
	print(STDERR "Primers\n");
	foreach (@primer) {
		print(STDERR "$_: $primer{$_}\n");
	}
}

my %tag;
if ($tagfile) {
	my @tag;
	my $tagfilehandle;
	unless (open($tagfilehandle, "< $tagfile")) {
		&errorMessage(__LINE__, "Cannot open \"$tagfile\".");
	}
	local $/ = "\n>";
	while (<$tagfilehandle>) {
		if (/^>?\s*(\S[^\r\n]*)\r?\n(.+)\r?\n/s) {
			my $name = $1;
			my $tag = uc($2);
			$name =~ s/\s+$//;
			if ($name =~ /__/) {
				&errorMessage(__LINE__, "\"$name\" is invalid name. Do not use \"__\" in tag name.");
			}
			$tag =~ s/[^A-Z]//sg;
			if (exists($tag{$name})) {
				&errorMessage(__LINE__, "The tag \"$name\" is multiply specified.");
			}
			else {
				$tag{$name} = $tag;
			}
			push(@tag, $name);
		}
	}
	close($tagfilehandle);
	print(STDERR "Tags\n");
	foreach (@tag) {
		print(STDERR "$_: $tag{$_}\n");
	}
}
print(STDERR "\n");

print(STDERR "Generating tab-delimited text...\n");
my $filehandle;
unless (open($filehandle, "> $outputfile")) {
	&errorMessage(__LINE__, "Cannot make \"$outputfile\".");
}
print($filehandle "Submission Title\tCenter Name\tContact E-mail\tContact Name\tHold Until Date\tStudy Title\tStudy Type\tStudy Abstract\tCenter Project Name\n");
print($filehandle "<Submission Title>\t<Center Name e.g. FISHRA>\t<Your E-mail address>\t<Your Name>\tYYYY-MM-DD+00:00\t<Study Title>\tMetagenomics\t<Study Abstract>\t<Center Project Name>\n\n");
print($filehandle "Sample Title\tTaxonomy ID\tScientific Name\tSample Description\tExperiment Title\tExperiment Decription\tLibrary Name\tLibrary Strategy\tLibrary Source\tLibrary Selection\tLibrary Layout\tTargeted Locus Name\tTargeted Locus Description\tPrimer Reference\tPooling Strategy\tLibrary Construction Protocol\tRead Index\tRead Class\tRead Type\tBase Coord\tDefault Length\tExpected Basecall\tRead Index\tRead Class\tRead Type\tBase Coord\tDefault Length\tExpected Basecall\tRead Index\tRead Class\tRead Type\tBase Coord\tDefault Length\tExpected Basecall\tRead Index\tRead Class\tRead Type\tBase Coord\tPlatform\tInstrument Model\tRun Date\tRun Center\tMD5 Checksum\tFile Type\tFile Name\n");
for (my $i = 0; $i < scalar(@inputfiles); $i ++) {
	my $filename = $inputfiles[$i];
	$filename =~ s/^.+\///;
	my $prefix = $filename;
	$prefix =~ s/\.[^\.]+$//;
	my $tag;
	my $taglength;
	my $primer;
	my $primerlength;
	my $primerfirstpos;
	my $appreadfirstpos;
	{
		my @temp = split(/__/, $prefix);
		if (scalar(@temp) == 2) {
			if (%primer && %tag) {
				&errorMessage(__LINE__, "\"$inputfiles[$i]\" is invalid file name.");
			}
			elsif (%primer && $primer{$temp[1]}) {
				$tag = '<BarCode Sequence>';
				$taglength = '<BarCode Length>';
				$primer = $primer{$temp[1]};
				$primerlength = length($primer{$temp[1]});
				$primerfirstpos = '<First Position>';
				$appreadfirstpos = '<First Position>';
			}
			elsif (%tag && $tag{$temp[1]}) {
				$tag = $tag{$temp[1]};
				$taglength = length($tag{$temp[1]});
				$primer = '<Primer Sequence>';
				$primerlength = '<Primer Length>';
				$primerfirstpos = '<First Position>';
				$appreadfirstpos = '<First Position>';
			}
			else {
				&errorMessage(__LINE__, "Although \"$inputfiles[$i]\" may contain tag or primer, matched tag or primer was not given.");
			}
		}
		elsif (scalar(@temp) == 3) {
			if (%primer && %tag && $tag{$temp[1]} && $primer{$temp[2]}) {
				$tag = $tag{$temp[1]};
				$taglength = length($tag{$temp[1]});
				$primer = $primer{$temp[2]};
				$primerlength = length($primer{$temp[2]});
				$primerfirstpos = 5 + $taglength;
				$appreadfirstpos = 5 + $taglength + $primerlength;
			}
			else {
				&errorMessage(__LINE__, "Although \"$inputfiles[$i]\" may contain tag and primer, matched tag and/or primer were not given.");
			}
		}
		else {
			$tag = '<BarCode Sequence>';
			$taglength = '<BarCode Length>';
			$primer = '<Primer Sequence>';
			$primerlength = '<Primer Length>';
		}
	}
	my $sfffile;
	unless (open($sfffile, "< $inputfiles[$i]")) {
		&errorMessage(__LINE__, "Cannot read \"$inputfiles[$i]\".");
	}
	binmode($sfffile);
	my $md5 = Digest::MD5->new->addfile(*$sfffile)->hexdigest;
	close($sfffile);
	print($filehandle "$prefix\t<NCBI Taxonomy ID e.g. 939928>\t<Scientific Name e.g. rhizosphere metagenome>\t<Where were the sample collected from? What is the sample?>\t$prefix\t<How DNA extrected?>\t$prefix\tAMPLICON\tMETAGENOMIC\tPCR\tSINGLE\t[16S rRNA/18S rRNA/RBCL/matK/COX1/ITS1-5.8S-ITS2/exome/other]\t<Locus Description e.g. ITS2 partial sequence>\t<Primer Reference PMID/PMCID/DOI e.g. PMID:22808280, PMCID:PMC3395698, DOI:10.1371/journal.pone.0040863>\tmultiplexed samples\t<How Library constructed?>\t0\tTechnical Read\tAdapter\t1\t4\tTCAG\t1\tTechnical Read\tBarCode\t5\t$taglength\t$tag\t2\tTechnical Read\tPrimer\t$primerfirstpos\t$primerlength\t$primer\t3\tApplication Read\tForward\t$appreadfirstpos\t[LS454/ION_TORRENT]\t[454 GS/454 GS 20/454 GS FLX/454 GS FLX+/454 GS FLX Titanium/454 GS Junior/Ion Torrent PGM/Ion Torrent Proton]\tYYYY-MM-DDT00:00:00+00:00\t<Run Center Name e.g. FISHRA>\t$md5\tsff\t$filename\n");
}
close($filehandle);
print(STDERR "done.\n\n");

sub errorMessage {
	my $lineno = shift(@_);
	my $message = shift(@_);
	print(STDERR "ERROR!: line $lineno\n$message\n");
	print(STDERR "If you want to read help message, run this script without options.\n");
	exit(1);
}

sub helpMessage {
	print(STDERR <<"_END");
Usage
=====
clmaketsv options inputfile1 inputfile2 ... inputfileN outputfile

Command line options
====================
-p, --primerfile=FILENAME
  Specify primer list file name. (default: none)

-t, --tagfile=FILENAME
  Specify tag list file name. (default: none)

Acceptable input file formats
=============================
SFF
_END
	exit;
}
