use strict;

my $buildno = '0.1.2013.02.13';

print(STDERR <<"_END");
clmakexml $buildno
=======================================================================

Official web site of this script is
http://www.fifthdimension.jp/products/claident/ .
To know script details, see above URL.

Copyright (C) 2011-2013  Akifumi S. Tanabe

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

_END

# display usage if command line options were not specified
unless (@ARGV) {
	&helpMessage();
}

# initialize variables
my $submissionid = $ARGV[-1];
# check output file
if (-e "$submissionid.submission.xml.pleaseedit" || -e "$submissionid.study.xml.pleaseedit" || -e "$submissionid.sample.xml" || -e "$submissionid.experiment.xml" || -e "$submissionid.run.xml") {
	&errorMessage(__LINE__, "Output file already exists.");
}
my @inputfiles;

{
	my %inputfiles;
	for (my $i = 0; $i < scalar(@ARGV) - 1; $i ++) {
		my @temp = glob($ARGV[$i]);
		if (scalar(@temp) > 0) {
			foreach (@temp) {
				if (!exists($inputfiles{$_})) {
					$inputfiles{$_} = 1;
					push(@inputfiles, $_);
				}
				else {
					&errorMessage(__LINE__, "\"$_\" is doubly specified.");
				}
			}
		}
		else {
			&errorMessage(__LINE__, "Input file does not exist.");
		}
	}
}

my $submissionhandle;
my $studyhandle;
my $samplehandle;
my $experimenthandle;
my $runhandle;
unless (open($submissionhandle, "> $submissionid.submission.xml")) {
	&errorMessage(__LINE__, "Cannot make \"$submissionid.submission.xml\".");
}
print($submissionhandle "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n");
unless (open($studyhandle, "> $submissionid.study.xml")) {
	&errorMessage(__LINE__, "Cannot make \"$submissionid.study.xml\".");
}
print($studyhandle "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<STUDY_SET xmlns:ns2=\"SRA.annotation\">\n");
unless (open($samplehandle, "> $submissionid.sample.xml")) {
	&errorMessage(__LINE__, "Cannot make \"$submissionid.sample.xml\".");
}
print($samplehandle "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<SAMPLE_SET xmlns:ns2=\"SRA.annotation\">\n");
unless (open($experimenthandle, "> $submissionid.experiment.xml")) {
	&errorMessage(__LINE__, "Cannot make \"$submissionid.experiment.xml\".");
}
print($experimenthandle "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<EXPERIMENT_SET xmlns:ns2=\"SRA.annotation\">\n");
unless (open($runhandle, "> $submissionid.run.xml")) {
	&errorMessage(__LINE__, "Cannot make \"$submissionid.run.xml\".");
}
print($runhandle "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<RUN_SET xmlns:ns2=\"SRA.annotation\">\n");
my $submissiontitle;
my $centername;
my $contactemail;
my $contactname;
my $holduntildate;
my $studytitle;
my $studytype;
my $studyabstract;
my $centerprojectname;
my $sampleno = '0001';
foreach my $inputfile (@inputfiles) {
	my $tsvfile;
	unless (open($tsvfile, "< $inputfile")) {
		&errorMessage(__LINE__, "Cannot read \"$inputfile\".");
	}
	my $lineno = 1;
	while (<$tsvfile>) {
		s/\r?\n?$//;
		my @element = split(/\t/, $_);
		for (my $i = 0; $i < scalar(@element); $i ++) {
			$element[$i] =~ s/^\"//;
			$element[$i] =~ s/\"$//;
		}
		if ($lineno == 2 && !$submissiontitle) {
			($submissiontitle, $centername, $contactemail, $contactname, $holduntildate, $studytitle, $studytype, $studyabstract, $centerprojectname) = @element;
			print($submissionhandle "<SUBMISSION center_name=\"$centername\" alias=\"$submissionid\_Submission\" xmlns:ns2=\"SRA.annotation\">\n\t<TITLE>$submissiontitle</TITLE>\n\t<CONTACTS>\n\t\t<CONTACT inform_on_error=\"$contactemail\" inform_on_status=\"$contactemail\" name=\"$contactname\"/>\n\t</CONTACTS>\n\t<ACTIONS>\n\t\t<ACTION>\n\t\t\t<ADD schema=\"study\" source=\"$submissionid.study.xml\"/>\n\t\t</ACTION>\n\t\t<ACTION>\n\t\t\t<ADD schema=\"sample\" source=\"$submissionid.sample.xml\"/>\n\t\t</ACTION>\n\t\t<ACTION>\n\t\t\t<ADD schema=\"experiment\" source=\"$submissionid.experiment.xml\"/>\n\t\t</ACTION>\n\t\t<ACTION>\n\t\t\t<ADD schema=\"run\" source=\"$submissionid.run.xml\"/>\n\t\t</ACTION>\n\t\t<ACTION>\n\t\t\t<HOLD HoldUntilDate=\"$holduntildate\"/>\n\t\t</ACTION>\n\t</ACTIONS>\n</SUBMISSION>\n");
			print($studyhandle "\t<STUDY center_name=\"$centername\" alias=\"$submissionid\_Study_0001\">\n\t\t<DESCRIPTOR>\n\t\t\t<STUDY_TITLE>$studytitle</STUDY_TITLE>\n\t\t\t<STUDY_TYPE existing_study_type=\"$studytype\"/>\n\t\t\t<STUDY_ABSTRACT>$studyabstract</STUDY_ABSTRACT>\n\t\t\t<CENTER_PROJECT_NAME>$centerprojectname</CENTER_PROJECT_NAME>\n\t\t</DESCRIPTOR>\n\t</STUDY>\n</STUDY_SET>\n");
		}
		elsif ($lineno > 4) {
			my ($sampletitle, $taxonomyid, $scientificname, $sampledescription, $experimenttitle, $experimentdescription, $libraryname, $librarystrategy, $librarysource, $libraryselection, $librarylayout, $targetedlocusname, $targetedlocusdescription, $primerreference, $poolingstrategy, $libraryconstructionprotocol, $readindex1, $readclass1, $readtype1, $basecoord1, $defaultlength1, $expectedbasecall1, $readindex2, $readclass2, $readtype2, $basecoord2, $defaultlength2, $expectedbasecall2, $readindex3, $readclass3, $readtype3, $basecoord3, $defaultlength3, $expectedbasecall3, $readindex4, $readclass4, $readtype4, $basecoord4, $platform, $instrumentmodel, $rundate, $runcenter, $md5, $filetype, $filename) = @element;
			my ($db, $id) = split(/:/, $primerreference);
			if ($db =~ /^pmid$/i) {
				$db = 'pubmed';
			}
			elsif ($db =~ /^pmcid$/i) {
				$db = 'pubmedcentral';
			}
			elsif ($db =~ /^doi$/i) {
				$db = 'doi';
			}
			print($samplehandle "\t<SAMPLE center_name=\"$centername\" alias=\"$submissionid\_Sample_$sampleno\">\n\t\t<TITLE>$sampletitle</TITLE>\n\t\t<SAMPLE_NAME>\n\t\t\t<TAXON_ID>$taxonomyid</TAXON_ID>\n\t\t\t<SCIENTIFIC_NAME>$scientificname</SCIENTIFIC_NAME>\n\t\t</SAMPLE_NAME>\n\t\t<DESCRIPTION>$sampledescription</DESCRIPTION>\n\t</SAMPLE>\n");
			print($experimenthandle "\t<EXPERIMENT center_name=\"$centername\" alias=\"$submissionid\_Experiment_$sampleno\">\n\t\t<TITLE>$experimenttitle</TITLE>\n\t\t<STUDY_REF refcenter=\"$centername\" refname=\"$submissionid\_Study_0001\"/>\n\t\t<DESIGN>\n\t\t\t<DESIGN_DESCRIPTION>$experimentdescription</DESIGN_DESCRIPTION>\n\t\t\t<SAMPLE_DESCRIPTOR refcenter=\"$centername\" refname=\"$submissionid\_Sample_$sampleno\"/>\n");
			print($experimenthandle "\t\t\t<LIBRARY_DESCRIPTOR>\n\t\t\t\t<LIBRARY_NAME>$libraryname</LIBRARY_NAME>\n\t\t\t\t<LIBRARY_STRATEGY>$librarystrategy</LIBRARY_STRATEGY>\n\t\t\t\t<LIBRARY_SOURCE>$librarysource</LIBRARY_SOURCE>\n\t\t\t\t<LIBRARY_SELECTION>$libraryselection</LIBRARY_SELECTION>\n\t\t\t\t<LIBRARY_LAYOUT>\n\t\t\t\t\t<$librarylayout/>\n\t\t\t\t</LIBRARY_LAYOUT>\n\t\t\t\t<TARGETED_LOCI>\n\t\t\t\t\t<LOCUS description=\"$targetedlocusdescription\" locus_name=\"$targetedlocusname\">\n\t\t\t\t\t\t<PROBE_SET>\n\t\t\t\t\t\t\t<DB>$db</DB>\n\t\t\t\t\t\t\t<ID>$id</ID>\n\t\t\t\t\t\t</PROBE_SET>\n\t\t\t\t\t</LOCUS>\n\t\t\t\t</TARGETED_LOCI>\n\t\t\t\t<POOLING_STRATEGY>$poolingstrategy</POOLING_STRATEGY>\n\t\t\t\t<LIBRARY_CONSTRUCTION_PROTOCOL>$libraryconstructionprotocol</LIBRARY_CONSTRUCTION_PROTOCOL>\n\t\t\t</LIBRARY_DESCRIPTOR>\n");
			print($experimenthandle "\t\t\t<SPOT_DESCRIPTOR>\n\t\t\t\t<SPOT_DECODE_SPEC>\n\t\t\t\t\t<READ_SPEC>\n\t\t\t\t\t<READ_INDEX>$readindex1</READ_INDEX>\n\t\t\t\t\t<READ_CLASS>$readclass1</READ_CLASS>\n\t\t\t\t\t<READ_TYPE>$readtype1</READ_TYPE>\n\t\t\t\t\t<EXPECTED_BASECALL base_coord=\"$basecoord1\" default_length=\"$defaultlength1\">$expectedbasecall1</EXPECTED_BASECALL>\n\t\t\t\t\t</READ_SPEC>\n\t\t\t\t\t<READ_SPEC>\n\t\t\t\t\t<READ_INDEX>$readindex2</READ_INDEX>\n\t\t\t\t\t<READ_CLASS>$readclass2</READ_CLASS>\n\t\t\t\t\t<READ_TYPE>$readtype2</READ_TYPE>\n\t\t\t\t\t<EXPECTED_BASECALL base_coord=\"$basecoord2\" default_length=\"$defaultlength2\">$expectedbasecall2</EXPECTED_BASECALL>\n\t\t\t\t\t</READ_SPEC>\n\t\t\t\t\t<READ_SPEC>\n\t\t\t\t\t<READ_INDEX>$readindex3</READ_INDEX>\n\t\t\t\t\t<READ_CLASS>$readclass3</READ_CLASS>\n\t\t\t\t\t<READ_TYPE>$readtype3</READ_TYPE>\n\t\t\t\t\t<EXPECTED_BASECALL base_coord=\"$basecoord3\" default_length=\"$defaultlength3\">$expectedbasecall3</EXPECTED_BASECALL>\n\t\t\t\t\t</READ_SPEC>\n\t\t\t\t\t<READ_SPEC>\n\t\t\t\t\t<READ_INDEX>$readindex4</READ_INDEX>\n\t\t\t\t\t<READ_CLASS>$readclass4</READ_CLASS>\n\t\t\t\t\t<READ_TYPE>$readtype4</READ_TYPE>\n\t\t\t\t\t<BASE_COORD>$basecoord4</BASE_COORD>\n\t\t\t\t\t</READ_SPEC>\n\t\t\t\t</SPOT_DECODE_SPEC>\n\t\t\t</SPOT_DESCRIPTOR>\n");
			print($experimenthandle "\t\t</DESIGN>\n\t\t<PLATFORM>\n\t\t\t<$platform>\n\t\t\t\t<INSTRUMENT_MODEL>$instrumentmodel</INSTRUMENT_MODEL>\n\t\t\t\t</$platform>\n\t\t</PLATFORM>\n\t\t<PROCESSING>\n\t\t\t<PIPELINE>\n\t\t\t\t<PIPE_SECTION>\n\t\t\t\t\t<STEP_INDEX>1</STEP_INDEX>\n\t\t\t\t\t<PREV_STEP_INDEX>NIL</PREV_STEP_INDEX>\n\t\t\t\t\t<PROGRAM></PROGRAM>\n\t\t\t\t\t<VERSION></VERSION>\n\t\t\t\t</PIPE_SECTION>\n\t\t\t</PIPELINE>\n\t\t</PROCESSING>\n\t</EXPERIMENT>\n");
			print($runhandle "\t<RUN center_name=\"$centername\" alias=\"$submissionid\_Run_$sampleno\" run_center=\"$runcenter\" run_date=\"$rundate\">\n\t\t<EXPERIMENT_REF refcenter=\"$centername\" refname=\"$submissionid\_Experiment_$sampleno\"/>\n\t\t<DATA_BLOCK>\n\t\t\t<FILES>\n\t\t\t\t<FILE checksum=\"$md5\" checksum_method=\"MD5\" filetype=\"$filetype\" filename=\"$filename\"/>\n\t\t\t</FILES>\n\t\t</DATA_BLOCK>\n\t</RUN>\n");
			$sampleno ++;
			$sampleno = sprintf("%04d", $sampleno);
		}
		$lineno ++;
	}
	close($tsvfile);
}
close($submissionhandle);
close($studyhandle);
print($samplehandle "</SAMPLE_SET>\n");
close($samplehandle);
print($experimenthandle "</EXPERIMENT_SET>\n");
close($experimenthandle);
print($runhandle "</RUN_SET>\n");
close($runhandle);

sub errorMessage {
	my $lineno = shift(@_);
	my $message = shift(@_);
	print(STDERR "ERROR!: line $lineno\n$message\n");
	print(STDERR "If you want to read help message, run this script without options.\n");
	exit(1);
}

sub helpMessage {
	print(STDERR <<"_END");
Usage
=====
clmakexml inputfile1 inputfile2 ... inputfileN submissionID

Acceptable input file formats
=============================
clmaketsv tab-delimited text
_END
	exit;
}
