use strict;
use File::Spec;

my $buildno = '0.1.2014.02.06';

print(STDERR <<"_END");
clcleanseq $buildno
=======================================================================

Official web site of this script is
http://www.fifthdimension.jp/products/claident/ .
To know script details, see above URL.

Copyright (C) 2011-2014  Akifumi S. Tanabe

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

_END

# display usage if command line options were not specified
unless (@ARGV) {
	&helpMessage();
}

my $devnull = File::Spec->devnull();
my $pnoisycluster = 0.8;
my $mincleanclustersize = 5;
my $numthreads = 1;
my $assamsoption;
my $uchimeoption;
my $denoise = 1;
my $uchime = 1;
my $falsepositive = 1;
my $minnpositive = 1;
my $savefasta = 1;
my $outputfolder = $ARGV[-1];
my @inputfiles;
if (-e $outputfolder) {
	&errorMessage(__LINE__, "\"$outputfolder\" already exists.");
}

# get command line options
{
	my $assamsmode = 0;
	my $uchimemode = 0;
	my %inputfiles;
	for (my $i = 0; $i < scalar(@ARGV) - 1; $i ++) {
		if ($ARGV[$i] eq 'end') {
			$assamsmode = 0;
			$uchimemode = 0;
		}
		elsif ($assamsmode) {
			$assamsoption .= " $ARGV[$i]";
		}
		elsif ($uchimemode) {
			$uchimeoption .= " $ARGV[$i]";
		}
		elsif ($ARGV[$i] eq 'assams') {
			$assamsmode = 1;
		}
		elsif ($ARGV[$i] eq 'uchime') {
			$uchimemode = 1;
		}
		elsif ($ARGV[$i] =~ /^-+(?:check|detect)f(?:alse)?p(?:ositive)?=(enable|disable|yes|no|true|false|E|D|Y|N|T|F)$/i) {
			if ($1 =~ /^(?:enable|yes|true|E|Y|T)$/i) {
				$falsepositive = 1;
			}
			elsif ($1 =~ /^(?:disable|no|false|D|N|F)$/i) {
				$falsepositive = 0;
			}
			else {
				&errorMessage(__LINE__, "\"$ARGV[$i]\" is invalid option.");
			}
		}
		elsif ($ARGV[$i] =~ /^-+(?:save|output)fasta=(enable|disable|yes|no|true|false|E|D|Y|N|T|F)$/i) {
			if ($1 =~ /^(?:enable|yes|true|E|Y|T)$/i) {
				$savefasta = 1;
			}
			elsif ($1 =~ /^(?:disable|no|false|D|N|F)$/i) {
				$savefasta = 0;
			}
			else {
				&errorMessage(__LINE__, "\"$ARGV[$i]\" is invalid option.");
			}
		}
		elsif ($ARGV[$i] =~ /^-+(?:detect|clean)(?:mode)?=(n|noise|c|chimera|n\+c|c\+n)$/i) {
			if ($1 =~ /^(?:n|noise)$/i) {
				$denoise = 1;
				$uchime = 0;
			}
			elsif ($1 =~ /^(?:c|chimera)$/i) {
				$denoise = 0;
				$uchime = 1;
			}
			elsif ($1 =~ /^(?:n\+c|c\+n)$/i) {
				$denoise = 1;
				$uchime = 1;
			}
		}
		elsif ($ARGV[$i] =~ /^-+(?:r|rate|p|percentage)noisycluster=(\d\.\d+)/i) {
			$pnoisycluster = $1;
		}
		elsif ($ARGV[$i] =~ /^-+min(?:imum)?cleanclustersize=(\d+)$/i) {
			$mincleanclustersize = $1;
		}
		elsif ($ARGV[$i] =~ /^-+min(?:imum)?n(?:um)?positive=(\d+)$/i) {
			$minnpositive = $1;
		}
		elsif ($ARGV[$i] =~ /^-+(?:n|n(?:um)?threads?)=(\d+)$/i) {
			$numthreads = $1;
		}
		else {
			my @temp = glob($ARGV[$i]);
			if (scalar(@temp) > 0) {
				foreach (@temp) {
					if (!exists($inputfiles{$_})) {
						$inputfiles{$_} = 1;
						push(@inputfiles, $_);
					}
					else {
						&errorMessage(__LINE__, "\"$_\" is doubly specified.");
					}
				}
			}
			else {
				&errorMessage(__LINE__, "Input file does not exist.");
			}
		}
	}
}

if ($falsepositive && !$uchime) {
	&errorMessage(__LINE__, "False positive check for chimera detection requires chimera detection.");
}

if ($assamsoption =~ /-+(?:n|n(?:um)?threads?)=\d+/i) {
	&errorMessage(__LINE__, "The option for assams is invalid.");
}
if ($assamsoption !~ /-+min(?:imum)?n(?:um)?seqs?=\d+/i) {
	$assamsoption .= ' --minnseq=1000';
}
if ($assamsoption !~ /-+(?:min(?:imum)?ident(?:ity|ities)?|m)=\d+/i) {
	$assamsoption .= ' --minident=0.995';
}
if ($assamsoption !~ /-+min(?:imum)?(?:overlap|ovl)(?:length|len)=\d+/i) {
	$assamsoption .= ' --minovllen=100';
}
if ($assamsoption !~ /-+max(?:imum)?n(?:um)?hits?=\d+/i) {
	$assamsoption .= ' --maxnhits=0';
}
if ($assamsoption !~ /-+(?:strand|s)=(?:forward|plus|single|both|double)/i) {
	$assamsoption .= ' --strand=plus';
}
if ($assamsoption !~ /-+assemble(?:mode)?=(?:exact|ungapped|ungap|gapped|gap|e\+u\+g|e\+u|e\+g|u\+g|all|e|u|g|a)/i) {
	$assamsoption .= ' --assemblemode=u+g';
}
if ($assamsoption !~ /-+merge(?:mode)?=(rough|lazy|draft|rapid|fast|middle|moderate|normal|accurate|slow|strict)/i) {
	$assamsoption .= ' --mergemode=normal';
}
if ($assamsoption !~ /-+(?:(?:aln|align|alignment)wiggle|a)=\d+/i) {
	$assamsoption .= ' --alnwiggle=11';
}
if ($uchimeoption =~ /\-+input/) {
	&errorMessage(__LINE__, "The option for uchime is invalid.");
}
if ($uchimeoption =~ /\-+uchimeout/) {
	&errorMessage(__LINE__, "The option for uchime is invalid.");
}
if ($uchimeoption !~ /\-\-minh \d+/) {
	$uchimeoption .= ' --minh 0.1';
}
if ($uchimeoption !~ /\-\-mindiv \d+/) {
	$uchimeoption .= ' --mindiv 0.8';
}

if (!@inputfiles) {
	&errorMessage(__LINE__, "No input file was specified.");
}

if (scalar(@inputfiles) == 1) {
	$falsepositive = 0;
	print(STDERR "WARNING: The number of inputfile is 1. Detection of false positives in chimera detection is disabled.\n\n");
}

print(STDERR "Command line options for assams for noise detection:$assamsoption\n");
print(STDERR "Command line options for uchime for chimera detection:$uchimeoption\n");
print(STDERR "\n");

my $bank2fasta;
my $listReadPlacedStatus;
my $dumpreads;
my $bankreport;
my $banktransact;
{
	my $pathto;
	if ($ENV{'ASSAMSHOME'}) {
		$pathto = $ENV{'ASSAMSHOME'};
	}
	else {
		my $temp;
		if (-e '.assams') {
			$temp = '.assams';
		}
		elsif (-e $ENV{'HOME'} . '/.assams') {
			$temp = $ENV{'HOME'} . '/.assams';
		}
		elsif (-e '/etc/assams/.assams') {
			$temp = '/etc/assams/.assams';
		}
		if ($temp) {
			my $filehandle;
			unless (open($filehandle, "< $temp")) {
				&errorMessage(__LINE__, "Cannot read \"$temp\".");
			}
			while (<$filehandle>) {
				if (/^\s*ASSAMSHOME\s*=\s*(\S[^\r\n]*)/) {
					$pathto = $1;
					$pathto =~ s/\s+$//;
					last;
				}
			}
			close($filehandle);
		}
	}
	if ($pathto) {
		$pathto =~ s/^"(.+)"$/$1/;
		$pathto =~ s/\/$//;
		$pathto .= '/bin';
		if (!-e $pathto) {
			&errorMessage(__LINE__, "Cannot find \"$pathto\".");
		}
		$bank2fasta = "\"$pathto/bank2fasta\"";
		$listReadPlacedStatus = "\"$pathto/listReadPlacedStatus\"";
		$dumpreads = "\"$pathto/dumpreads\"";
		$bankreport = "\"$pathto/bank-report\"";
		$banktransact = "\"$pathto/bank-transact\"";
	}
	else {
		$bank2fasta = 'bank2fasta';
		$listReadPlacedStatus = 'listReadPlacedStatus';
		$dumpreads = 'dumpreads';
		$bankreport = 'bank-report';
		$banktransact = 'bank-transact';
	}
}

# make output directory
print(STDERR "Making output folder...\n");
if (!mkdir($outputfolder)) {
	&errorMessage(__LINE__, 'Cannot make output directory.');
}
print(STDERR "done.\n\n");

# run first assams in parallel
my $exactoption = $assamsoption;
print(STDERR "Running assembly based on exact overlap by assams at each file...\n");
{
	$exactoption =~ s/-+(?:min(?:imum)?ident(?:ity|ities)?|m)=[0-9\.]+/--minident=1/i;
	$exactoption =~ s/-+assemble(?:mode)?=(?:exact|ungapped|ungap|gapped|gap|e\+u\+g|e\+u|e\+g|u\+g|all|e|u|g|a)/--assemblemode=e/i;
	$exactoption =~ s/-+merge(?:mode)?=(rough|lazy|draft|rapid|fast|middle|moderate|normal|accurate|slow|strict)/--mergemode=rough/i;
	my @newinput;
	foreach my $inputfile (@inputfiles) {
		print(STDERR "Processing $inputfile...\n");
		my $filename = $inputfile;
		$filename =~ s/^.+(?:\/|\\)//;
		$filename =~ s/\.[^\.]+$//;
		push(@newinput, $filename);
		if (system("assams$exactoption --numthreads=$numthreads $inputfile $outputfolder/$filename.preuchime.bnk 2> $devnull 1> $devnull")) {
			&errorMessage(__LINE__, "Cannot run \"assams$exactoption --numthreads=$numthreads $inputfile $outputfolder/$filename.preuchime.bnk\".");
		}
		# make preuchime afg
		#if (system("$bankreport -p -b $outputfolder/$filename.preuchime.bnk 2> $devnull | gzip -9 -c > $outputfolder/$filename.preuchime.afg.gz 2> $devnull")) {
		#	&errorMessage(__LINE__, "Cannot run \"$bankreport -p -b $outputfolder/$filename.preuchime.bnk | gzip -9 -c > $outputfolder/$filename.preuchime.afg.gz\".");
		#}
	}
	@inputfiles = @newinput;
}
print(STDERR "done.\n\n");

# change working directory
unless (chdir($outputfolder)) {
	&errorMessage(__LINE__, 'Cannot change working directory.');
}

if ($uchime) {
	# prepare for chimera detection
	print(STDERR "Preparing files for chimera detection...\n");
	{
		my $child = 0;
		$| = 1;
		$? = 0;
		foreach my $inputfile (@inputfiles) {
			if (my $pid = fork()) {
				$child ++;
				if ($child >= ($numthreads + 1)) {
					if (wait == -1) {
						$child = 0;
					} else {
						$child --;
					}
				}
				if ($?) {
					&errorMessage(__LINE__, "The processes did not finished correctly.");
				}
				next;
			}
			else {
				if (-e "$inputfile.preuchime.bnk/CTG.ifo") {
					my %contigmembers;
					my $pipehandle;
					my $outputhandle;
					my $numsingletons = 0;
					# get the members
					unless (open($pipehandle, "$listReadPlacedStatus $inputfile.preuchime.bnk 2> $devnull |")) {
						&errorMessage(__LINE__, "Cannot run \"$listReadPlacedStatus $inputfile.preuchime.bnk\".");
					}
					while (<$pipehandle>) {
						s/\r?\n?$//;
						if (my @row = split(/\t/)) {
							if (scalar(@row) == 4 && $row[2] eq 'S') {
								$numsingletons ++;
							}
							elsif (scalar(@row) == 5 && $row[2] eq 'P') {
								push(@{$contigmembers{$inputfile}{$row[4]}}, $row[1]);
							}
							else {
								&errorMessage(__LINE__, "Invalid assemble results.\nBank: $inputfile.preuchime.bnk\nSequence: $row[1]\nContig: $row[4]\n");
							}
						}
					}
					close($pipehandle);
					if ($?) {
						&errorMessage(__LINE__, "Cannot run \"$listReadPlacedStatus $inputfile.preuchime.bnk\".");
					}
					# contigs to preuchime
					unless (open($outputhandle, "> $inputfile.preuchime.fasta")) {
						&errorMessage(__LINE__, "Cannot write \"$inputfile.preuchime.fasta\".");
					}
					my @tempcontig = keys(%{$contigmembers{$inputfile}});
					if (@tempcontig) {
						unless (open($pipehandle, "$bank2fasta -b $inputfile.preuchime.bnk -iid 2> $devnull |")) {
							&errorMessage(__LINE__, "Cannot run \"$bank2fasta -b $inputfile.preuchime.bnk -iid\".");
						}
						while (<$pipehandle>) {
							if (/^>(\d+)/) {
								my $contignumber = $1;
								if ($contigmembers{$inputfile}{$contignumber}) {
									my $ab = sprintf("%.1f", scalar(@{$contigmembers{$inputfile}{$contignumber}}));
									print($outputhandle ">contig_$contignumber\/ab=$ab\/\n");
								}
								else {
									&errorMessage(__LINE__, "Invalid assemble results.\nBank: $inputfile.preuchime.bnk\nContig: $contignumber\n");
								}
							}
							else {
								print($outputhandle $_);
							}
						}
						close($pipehandle);
						if ($?) {
							&errorMessage(__LINE__, "Cannot run \"$bank2fasta -b $inputfile.preuchime.bnk -iid\".");
						}
					}
					# singletons to preuchime
					#if ($numsingletons) {
					#	if (system("$listReadPlacedStatus -S -I $inputfile.preuchime.bnk > $inputfile.preuchime.singletons.iid 2> $devnull")) {
					#		&errorMessage(__LINE__, "Cannot run \"$listReadPlacedStatus -S -I $inputfile.preuchime.bnk > $inputfile.preuchime.singletons.iid\".");
					#	}
					#	unless (open($pipehandle, "$dumpreads -r -e -I $inputfile.preuchime.singletons.iid $inputfile.preuchime.bnk 2> $devnull |")) {
					#		&errorMessage(__LINE__, "Cannot run \"$dumpreads -r -e -I $inputfile.preuchime.singletons.iid $inputfile.preuchime.bnk\".");
					#	}
					#	while (<$pipehandle>) {
					#		if (/^>\S+/) {
					#			s/^>(\S+)/>$1\/ab=1.0\//;
					#		}
					#		print($outputhandle $_);
					#	}
					#	close($pipehandle);
					#	if ($?) {
					#		&errorMessage(__LINE__, "Cannot run \"$dumpreads -r -e -I $inputfile.preuchime.singletons.iid $inputfile.preuchime.bnk\".");
					#	}
					#	unlink("$inputfile.preuchime.singletons.iid");
					#}
					close($outputhandle);
				}
				#else {
				#	my $pipehandle;
				#	my $outputhandle;
				#	unless (open($outputhandle, "> $inputfile.preuchime.fasta")) {
				#		&errorMessage(__LINE__, "Cannot write \"$inputfile.preuchime.fasta\".");
				#	}
				#	unless (open($pipehandle, "$dumpreads -r -e $inputfile.preuchime.bnk 2> $devnull |")) {
				#		&errorMessage(__LINE__, "Cannot run \"$dumpreads -r -e $inputfile.preuchime.bnk\".");
				#	}
				#	while (<$pipehandle>) {
				#		if (/^>\S+/) {
				#			s/^>(\S+)/>$1\/ab=1.0\//;
				#		}
				#		print($outputhandle $_);
				#	}
				#	close($pipehandle);
				#	if ($?) {
				#		&errorMessage(__LINE__, "Cannot run \"$dumpreads -r -e $inputfile.preuchime.bnk\".");
				#	}
				#	close($outputhandle);
				#}
				exit;
			}
		}
		# join
		while (wait != -1) {
			if ($?) {
				&errorMessage(__LINE__, "The processes did not finished correctly.");
			}
		}
	}
	print(STDERR "done.\n\n");

	# run chimera detection in parallel
	print(STDERR "Running chimera detection by uchime at each file...\n");
	{
		my $child = 0;
		$| = 1;
		$? = 0;
		foreach my $inputfile (@inputfiles) {
			if (my $pid = fork()) {
				$child ++;
				if ($child >= $numthreads) {
					if (wait == -1) {
						$child = 0;
					} else {
						$child --;
					}
				}
				if ($?) {
					&errorMessage(__LINE__, "The processes did not finished correctly.");
				}
				next;
			}
			else {
				print(STDERR "Processing $inputfile...\n");
				if (-e "$inputfile.preuchime.fasta") {
					if (system("uchime --input $inputfile.preuchime.fasta --uchimeout $inputfile.uchime.txt$uchimeoption 2> $devnull 1> $devnull")) {
						&errorMessage(__LINE__, "Chimera detection by uchime was failed at \"$inputfile.preuchime.fasta\".");
					}
				}
				exit;
			}
		}
		# join
		while (wait != -1) {
			if ($?) {
				&errorMessage(__LINE__, "The processes did not finished correctly.");
			}
		}
	}
	print(STDERR "done.\n\n");

	# delete chimeras
	print(STDERR "Deleting chimeras...\n");
	{
		my $child = 0;
		$| = 1;
		$? = 0;
		foreach my $inputfile (@inputfiles) {
			if (my $pid = fork()) {
				$child ++;
				if ($child >= ($numthreads + 1)) {
					if (wait == -1) {
						$child = 0;
					} else {
						$child --;
					}
				}
				if ($?) {
					&errorMessage(__LINE__, "The processes did not finished correctly.");
				}
				next;
			}
			else {
				if (-e "$inputfile.preuchime.bnk" && -e "$inputfile.preuchime.fasta" && -e "$inputfile.uchime.txt") {
					unlink("$inputfile.preuchime.fasta");
					my %contigmembers;
					my $pipehandle;
					# get the members
					unless (open($pipehandle, "$listReadPlacedStatus $inputfile.preuchime.bnk 2> $devnull |")) {
						&errorMessage(__LINE__, "Cannot run \"$listReadPlacedStatus $inputfile.preuchime.bnk\".");
					}
					while (<$pipehandle>) {
						s/\r?\n?$//;
						if (my @row = split(/\t/)) {
							if (scalar(@row) == 4 && $row[2] eq 'S') {
								next;
							}
							elsif (scalar(@row) == 5 && $row[2] eq 'P') {
								push(@{$contigmembers{$inputfile}{$row[4]}}, $row[1]);
							}
							else {
								&errorMessage(__LINE__, "Invalid assemble results.\nBank: $inputfile.preuchime.bnk\nSequence: $row[1]\nContig: $row[4]\n");
							}
						}
					}
					close($pipehandle);
					if ($?) {
						&errorMessage(__LINE__, "Cannot run \"$listReadPlacedStatus $inputfile.preuchime.bnk\".");
					}
					my $inputhandle;
					# store chimera list
					my @chimericreads;
					#my @chimericcontigs;
					if (-z "$inputfile.uchime.txt") {
						&errorMessage(__LINE__, "UCHIME output \"$inputfile.uchime.txt\" is invalid.");
					}
					unless (open($inputhandle, "< $inputfile.uchime.txt")) {
						&errorMessage(__LINE__, "Cannot open \"$inputfile.uchime.txt\".");
					}
					while (<$inputhandle>) {
						my @entry = split(/\t/, $_);
						if ($entry[-1] =~ /^Y/ && $entry[1] =~ /^(.+)\/ab=\d+\.\d+\//) {
							my $seqname = $1;
							if ($seqname =~ /^contig_(\d+)/) {
								#push(@chimericcontigs, $1);
								foreach my $member (@{$contigmembers{$inputfile}{$1}}) {
									push(@chimericreads, $member);
								}
							}
							else {
								push(@chimericreads, $seqname);
							}
						}
					}
					close($inputhandle);
					#if (@chimericcontigs) {
					#	my $outputhandle;
					#	unless (open($outputhandle, "> $inputfile.chimericcontigs.txt")) {
					#		&errorMessage(__LINE__, "Cannot write \"$inputfile.chimericcontigs.txt\".");
					#	}
					#	foreach my $chimera (@chimericcontigs) {
					#		print($outputhandle $chimera . "\n");
					#	}
					#	close($outputhandle);
					#}
					if (@chimericreads) {
						my $outputhandle;
						unless (open($outputhandle, "> $inputfile.chimericreads.txt")) {
							&errorMessage(__LINE__, "Cannot write \"$inputfile.chimericreads.txt\".");
						}
						foreach my $chimera (@chimericreads) {
							print($outputhandle $chimera . "\n");
						}
						close($outputhandle);
					}
					# make new banks
					#my $pipehandlein;
					#unless (open($pipehandlein, "$bankreport -p -b $inputfile.preuchime.bnk 2> $devnull |")) {
					#	&errorMessage(__LINE__, "Cannot run \"$bankreport -p -b $inputfile.preuchime.bnk\".");
					#}
					#my $pipehandleout;
					#unless (open($pipehandleout, "| $banktransact -c -f -z -b $inputfile.postuchime.bnk -m - 1> $devnull 2> $devnull")) {
					#	&errorMessage(__LINE__, "Cannot run \"$banktransact -c -f -z -b $inputfile.postuchime.bnk -m -\".");
					#}
					#my $temp;
					#my $nesting = 0;
					#my $contig;
					#my $read;
					#while (<$pipehandlein>) {
					#	if (/^\}/) {
					#		$nesting --;
					#	}
					#	elsif (/^\{([A-Z]+)/) {
					#		$nesting ++;
					#		if ($1 eq 'CTG') {
					#			$contig ++;
					#		}
					#		elsif ($1 eq 'RED') {
					#			$read ++
					#		}
					#	}
					#	$temp .= $_;
					#	if ($nesting == 0) {
					#		if (!$contig && !$read || $contig && $temp =~ /^iid:(\d+)/m && !$chimeras{"contig_$1"} || $read && $temp =~ /^eid:(\S+)/m && !$chimeras{$1}) {
					#			print($pipehandleout $temp);
					#		}
					#		undef($temp);
					#		undef($contig);
					#		undef($read);
					#	}
					#}
					#close($pipehandleout);
					#if ($?) {
					#	&errorMessage(__LINE__, "Cannot run \"$banktransact -c -f -z -b $inputfile.postuchime.bnk -m -\".");
					#}
					#close($pipehandlein);
					#if ($?) {
					#	&errorMessage(__LINE__, "Cannot run \"$bankreport -p -b $inputfile.preuchime.bnk\".");
					#}
					# make postuchime afg
					#if (system("$bankreport -p -b $inputfile.postuchime.bnk 2> $devnull | gzip -9 -c > $inputfile.postuchime.afg.gz 2> $devnull")) {
					#	&errorMessage(__LINE__, "Cannot run $bankreport -p|gzip for \"$inputfile.postuchime.bnk\".");
					#}
				}
				exit;
			}
		}
		# join
		while (wait != -1) {
			if ($?) {
				&errorMessage(__LINE__, "The processes did not finished correctly.");
			}
		}
	}
	print(STDERR "done.\n\n");
}

{
	if ($denoise) {
		print(STDERR "Running noise detection...\n");
	}
	else {
		print(STDERR "Running additional computation...\n");
	}
	my @primarysingletons;
	my %primarycontigmembers;
	my %primaryclustersize;
	# merge assembly
	if (system("assams$exactoption --numthreads=$numthreads " . join('.preuchime.bnk ', @inputfiles) . ".preuchime.bnk denoising1.bnk 1> $devnull")) {
		&errorMessage(__LINE__, "Cannot run \"assams$exactoption --numthreads=$numthreads " . join('.preuchime.bnk ', @inputfiles) . ".preuchime.bnk denoising1.bnk\".");
	}
	# contigs to denoising1
	{
		my $pipehandle;
		# get the members
		unless (open($pipehandle, "$listReadPlacedStatus denoising1.bnk 2> $devnull |")) {
			&errorMessage(__LINE__, "Cannot run \"$listReadPlacedStatus denoising1.bnk\".");
		}
		while (<$pipehandle>) {
			s/\r?\n?$//;
			if (my @row = split(/\t/)) {
				if (scalar(@row) == 4 && $row[2] eq 'S') {
					push(@primarysingletons, $row[1]);
				}
				elsif (scalar(@row) == 5 && $row[2] eq 'P') {
					push(@{$primarycontigmembers{$row[4]}}, $row[1]);
				}
				else {
					&errorMessage(__LINE__, "Invalid assemble results.\nBank: denoising1.bnk\nSequence: $row[1]\nContig: $row[4]\n");
				}
			}
		}
		close($pipehandle);
		if ($?) {
			&errorMessage(__LINE__, "Cannot run \"$listReadPlacedStatus denoising1.bnk\".");
		}
		my @tempcontig = keys(%primarycontigmembers);
		if (@tempcontig) {
			my $inputhandle;
			my $outputhandle;
			if ($falsepositive) {
				my %chimeric;
				foreach my $inputfile (@inputfiles) {
					if (-e "$inputfile.chimericreads.txt") {
						unless (open($inputhandle, "< $inputfile.chimericreads.txt")) {
							&errorMessage(__LINE__, "Cannot read \"$inputfile.chimericreads.txt\".");
						}
						while (<$inputhandle>) {
							if (/^(\S+)/) {
								$chimeric{$1} = 1;
							}
						}
						close($inputhandle);
					}
				}
				foreach my $primarycontig (@tempcontig) {
					my @tempchimeric;
					my $numchimeric = 0;
					my $numall = 0;
					my @tempnonchimeric;
					foreach my $seqname (@{$primarycontigmembers{$primarycontig}}) {
						if ($chimeric{$seqname}) {
							push(@tempchimeric, $seqname);
							$numchimeric ++;
						}
						else {
							push(@tempnonchimeric, $seqname);
						}
						$numall ++;
					}
					if ($minnpositive && $numchimeric >= $minnpositive) {
						foreach my $seqname (@tempnonchimeric) {
							$chimeric{$seqname} = 1;
						}
					}
					elsif (($numchimeric < $minnpositive || $minnpositive == 0) && $numchimeric < $numall) {
						foreach my $seqname (@tempchimeric) {
							my $prefix = $seqname;
							$prefix =~ s/^.+?__//;
							unless (open($outputhandle, ">> $prefix.falsepositives.txt")) {
								&errorMessage(__LINE__, "Cannot write \"$prefix.falsepositives.txt\".");
							}
							print($outputhandle $seqname . "\n");
							close($outputhandle);
							delete($chimeric{$seqname});
						}
					}
				}
				foreach my $inputfile (@inputfiles) {
					unlink("$inputfile.chimericreads.txt");
				}
				foreach my $seqname (keys(%chimeric)) {
					my $prefix = $seqname;
					$prefix =~ s/^.+?__//;
					unless (open($outputhandle, ">> $prefix.chimericreads.txt")) {
						&errorMessage(__LINE__, "Cannot write \"$prefix.chimericreads.txt\".");
					}
					print($outputhandle $seqname . "\n");
					close($outputhandle);
				}
			}
			if ($denoise) {
				if (system("$bank2fasta -b denoising1.bnk -iid -q temp1.fasta.qual > temp1.fasta 2> $devnull")) {
					&errorMessage(__LINE__, "Cannot run \"$bank2fasta -b denoising1.bnk -iid -q temp1.fasta.qual > temp1.fasta\".");
				}
				unless (open($inputhandle, "< temp1.fasta")) {
					&errorMessage(__LINE__, "Cannot read \"temp1.fasta\".");
				}
				unless (open($outputhandle, "> denoising1.fasta")) {
					&errorMessage(__LINE__, "Cannot write \"denoising1.fasta\".");
				}
				while (<$inputhandle>) {
					s/^>(\d+)(\r?\n?)$/>primarycontig_$1$2/;
					print($outputhandle $_);
				}
				close($outputhandle);
				close($inputhandle);
				unless (open($inputhandle, "< temp1.fasta.qual")) {
					&errorMessage(__LINE__, "Cannot read \"temp1.fasta.qual\".");
				}
				unless (open($outputhandle, "> denoising1.fasta.qual")) {
					&errorMessage(__LINE__, "Cannot write \"denoising1.fasta.qual\".");
				}
				while (<$inputhandle>) {
					s/^>(\d+)(\r?\n?)$/>primarycontig_$1$2/;
					print($outputhandle $_);
				}
				close($outputhandle);
				close($inputhandle);
				unlink('temp1.fasta');
				unlink('temp1.fasta.qual');
				foreach my $primarycontig (@tempcontig) {
					$primaryclustersize{$primarycontig} = scalar(@{$primarycontigmembers{$primarycontig}});
				}
			}
		}
	}
	#foreach my $primarycontig (keys(%primarycontigmembers)) {
	#	foreach my $seqname (@{$primarycontigmembers{$primarycontig}}) {
	#		print("$primarycontig : $seqname\n");
	#	}
	#}
	#foreach my $primarycontig (keys(%primaryclustersize)) {
	#	print("size of $primarycontig : $primaryclustersize{$primarycontig}\n");
	#}
	#exit;
	if ($denoise) {
		if ($mincleanclustersize == 0) {
			# assemble denoising1 to denoising2
			if (-e 'denoising1.fasta' && -e 'denoising1.fasta.qual') {
				if (system("assams$assamsoption --numthreads=$numthreads denoising1.fasta denoising2.bnk 1> $devnull")) {
					&errorMessage(__LINE__, "Cannot run \"assams$assamsoption --numthreads=$numthreads denoising1.fasta denoising2.bnk\".");
				}
				unlink('denoising1.fasta');
				unlink('denoising1.fasta.qual');
			}
			# read assemble result
			my %secondarycontigmembers;
			my @secondarysingletons;
			{
				my $pipehandle;
				# get the members
				unless (open($pipehandle, "$listReadPlacedStatus denoising2.bnk 2> $devnull |")) {
					&errorMessage(__LINE__, "Cannot run \"$listReadPlacedStatus denoising2.bnk\".");
				}
				while (<$pipehandle>) {
					s/\r?\n?$//;
					if (my @row = split(/\t/)) {
						if (scalar(@row) == 4 && $row[2] eq 'S') {
							push(@secondarysingletons, $row[1]);
						}
						elsif (scalar(@row) == 5 && $row[2] eq 'P' && $row[1] =~ /^primarycontig_(\d+)$/) {
							push(@{$secondarycontigmembers{$row[4]}}, $1);
						}
						else {
							&errorMessage(__LINE__, "Invalid assemble results.\nBank: denoising2.bnk\nSequence: $row[1]\nContig: $row[4]\n");
						}
					}
				}
				close($pipehandle);
				if ($?) {
					&errorMessage(__LINE__, "Cannot run \"$listReadPlacedStatus denoising2.bnk\".");
				}
			}
			#foreach my $secondarycontig (keys(%secondarycontigmembers)) {
			#	foreach my $primarycontig (@{$secondarycontigmembers{$secondarycontig}}) {
			#		print("$secondarycontig : $primarycontig\n");
			#		foreach my $seqname (@{$primarycontigmembers{$primarycontig}}) {
			#			print("$primarycontig : $seqname\n");
			#		}
			#	}
			#}
			#exit;
			# determine threshold
			#my %secondaryclustersize;
			{
				my @primaryclustersize;
				foreach my $secondarycontig (keys(%secondarycontigmembers)) {
					my @tempclustersize;
					foreach my $primarycontig (@{$secondarycontigmembers{$secondarycontig}}) {
						push(@tempclustersize, $primaryclustersize{$primarycontig});
					}
					#my $secondaryclustersize = 0;
					#foreach (@tempclustersize) {
					#	$secondaryclustersize += $_;
					#}
					#$secondaryclustersize{$secondarycontig} = $secondaryclustersize;
					@tempclustersize = sort({$b <=> $a} @tempclustersize);
					shift(@tempclustersize);
					push(@primaryclustersize, @tempclustersize);
				}
				@primaryclustersize = sort({$a <=> $b} @primaryclustersize);
				$mincleanclustersize = $primaryclustersize[int(scalar(@primaryclustersize) * $pnoisycluster)];
			}
			#foreach my $secondarycontig (keys(%secondaryclustersize)) {
			#	print("size of $secondarycontig : $secondaryclustersize{$secondarycontig}\n");
			#}
			#exit;
		}
		# save sequence names for elimination
		{
			my $outputhandle;
			#foreach my $secondarycontig (keys(%secondarycontigmembers)) {
			#	if ($secondaryclustersize{$secondarycontig} < $mincleanclustersize) {
			#		foreach my $primarycontig (@{$secondarycontigmembers{$secondarycontig}}) {
			#			foreach my $seqname (@{$primarycontigmembers{$primarycontig}}) {
			#				my $prefix = $seqname;
			#				$prefix =~ s/^.+?__//;
			#				unless (open($outputhandle, ">> $prefix.noisyreads.txt")) {
			#					&errorMessage(__LINE__, "Cannot write \"$prefix.noisyreads.txt\".");
			#				}
			#				print($outputhandle $seqname . "\n");
			#				close($outputhandle);
			#			}
			#		}
			#	}
			#}
			#foreach my $primarycontig (@secondarysingletons) {
			#my @noisycontigs;
			foreach my $primarycontig (keys(%primarycontigmembers)) {
				if ($primaryclustersize{$primarycontig} < $mincleanclustersize) {
					foreach my $seqname (@{$primarycontigmembers{$primarycontig}}) {
						my $prefix = $seqname;
						$prefix =~ s/^.+?__//;
						unless (open($outputhandle, ">> $prefix.noisyreads.txt")) {
							&errorMessage(__LINE__, "Cannot write \"$prefix.noisyreads.txt\".");
						}
						print($outputhandle $seqname . "\n");
						close($outputhandle);
						#push(@noisycontigs, $seqname);
					}
				}
			}
			#unless (open($outputhandle, "> noisycontigs.txt")) {
			#	&errorMessage(__LINE__, "Cannot write \"noisycontigs.txt\".");
			#}
			#foreach my $noisycontig (@noisycontigs) {
			#	print($outputhandle $noisycontig . "\n");
			#}
			#close($outputhandle);
			foreach my $seqname (@primarysingletons) {
				my $prefix = $seqname;
				$prefix =~ s/^.+?__//;
				unless (open($outputhandle, ">> $prefix.chimericreads.txt")) {
					&errorMessage(__LINE__, "Cannot write \"$prefix.chimericreads.txt\".");
				}
				print($outputhandle $seqname . "\n");
				close($outputhandle);
				unless (open($outputhandle, ">> $prefix.noisyreads.txt")) {
					&errorMessage(__LINE__, "Cannot write \"$prefix.noisyreads.txt\".");
				}
				print($outputhandle $seqname . "\n");
				close($outputhandle);
			}
			unless (open($outputhandle, "> denoising.txt")) {
				&errorMessage(__LINE__, "Cannot write \"denoising.txt\".");
			}
			print($outputhandle "minimum clean cluster size: $mincleanclustersize\n");
			close($outputhandle);
		}
	}
	print(STDERR "done.\n\n");
}

# delete chimeric and/or noisy sequences
print(STDERR "Deleting chimeric and/or noisy sequences...\n");
{
	my %chimeric;
	my %noisy;
	my %notclean;
	# read the results of chimera detection
	if ($uchime) {
		foreach my $inputfile (@inputfiles) {
			my $inputhandle;
			if (-e "$inputfile.chimericreads.txt") {
				unless (open($inputhandle, "< $inputfile.chimericreads.txt")) {
					&errorMessage(__LINE__, "Cannot read \"$inputfile.chimericreads.txt\".");
				}
				while (<$inputhandle>) {
					if (/^(\S+)/) {
						$chimeric{$1} = 1;
						$notclean{$1} = 1;
					}
				}
				close($inputhandle);
			}
		}
		&makeAFGGZ('denoising1.bnk', 'chimeraremoved.afg.gz', \%chimeric);
	}
	# read the results of noisy reads detection
	if ($denoise) {
		foreach my $inputfile (@inputfiles) {
			my $inputhandle;
			# read noisy read list
			if (-e "$inputfile.noisyreads.txt") {
				unless (open($inputhandle, "< $inputfile.noisyreads.txt")) {
					&errorMessage(__LINE__, "Cannot read \"$inputfile.noisyreads.txt\".");
				}
				while (<$inputhandle>) {
					if (/^(\S+)/) {
						$noisy{$1} = 1;
						$notclean{$1} = 1;
					}
				}
				close($inputhandle);
			}
		}
		&makeAFGGZ('denoising1.bnk', 'denoised.afg.gz', \%noisy);
	}
	# output clean afg.gz
	if ($denoise && $uchime) {
		&makeAFGGZ('denoising1.bnk', 'cleaned.afg.gz', \%notclean);
	}
	# output FASTA
	if ($savefasta) {
		foreach my $inputfile (@inputfiles) {
			my $chimeraremoved;
			my $denoised;
			my $cleaned;
			my $pipehandle;
			# make clean fasta
			if ($uchime) {
				unless (open($chimeraremoved, "> $inputfile.chimeraremoved.fasta")) {
					&errorMessage(__LINE__, "Cannot write \"$inputfile.chimeraremoved.fasta\".");
				}
			}
			if ($denoise) {
				unless (open($denoised, "> $inputfile.denoised.fasta")) {
					&errorMessage(__LINE__, "Cannot write \"$inputfile.denoised.fasta\".");
				}
			}
			if ($uchime && $denoise) {
				unless (open($cleaned, "> $inputfile.cleaned.fasta")) {
					&errorMessage(__LINE__, "Cannot write \"$inputfile.cleaned.fasta\".");
				}
			}
			unless (open($pipehandle, "$dumpreads -r -e $inputfile.preuchime.bnk 2> $devnull |")) {
				&errorMessage(__LINE__, "Cannot run \"$dumpreads -r -e $inputfile.preuchime.bnk\".");
			}
			{
				my $chimeric = 1;
				my $noisy = 1;
				my $notclean = 1;
				while (<$pipehandle>) {
					if (/^>(\S+)\r?\n?/) {
						if ($chimeric{$1}) {
							$chimeric = 1;
						}
						else {
							$chimeric = 0;
						}
						if ($noisy{$1}) {
							$noisy = 1;
						}
						else {
							$noisy = 0;
						}
						if ($notclean{$1}) {
							$notclean = 1;
						}
						else {
							$notclean = 0;
						}
					}
					if (!$chimeric) {
						print($chimeraremoved $_);
					}
					if (!$noisy) {
						print($denoised $_);
					}
					if (!$notclean) {
						print($cleaned $_);
					}
				}
			}
			close($pipehandle);
			if ($?) {
				&errorMessage(__LINE__, "Cannot run \"$dumpreads -r -e $inputfile.preuchime.bnk\".");
			}
			close($chimeraremoved);
			close($denoised);
			close($cleaned);
			if ($uchime) {
				unless (open($chimeraremoved, "> $inputfile.chimeraremoved.fasta.qual")) {
					&errorMessage(__LINE__, "Cannot write \"$inputfile.chimeraremoved.fasta.qual\".");
				}
			}
			if ($denoise) {
				unless (open($denoised, "> $inputfile.denoised.fasta.qual")) {
					&errorMessage(__LINE__, "Cannot write \"$inputfile.denoised.fasta.qual\".");
				}
			}
			if ($uchime && $denoise) {
				unless (open($cleaned, "> $inputfile.cleaned.fasta.qual")) {
					&errorMessage(__LINE__, "Cannot write \"$inputfile.cleaned.fasta.qual\".");
				}
			}
			unless (open($pipehandle, "$dumpreads -r -e -q $inputfile.preuchime.bnk 2> $devnull |")) {
				&errorMessage(__LINE__, "Cannot run \"$dumpreads -r -e -q $inputfile.preuchime.bnk\".");
			}
			{
				my $chimeric = 1;
				my $noisy = 1;
				my $notclean = 1;
				while (<$pipehandle>) {
					if (/^>(\S+)\r?\n?/) {
						if ($chimeric{$1}) {
							$chimeric = 1;
						}
						else {
							$chimeric = 0;
						}
						if ($noisy{$1}) {
							$noisy = 1;
						}
						else {
							$noisy = 0;
						}
						if ($notclean{$1}) {
							$notclean = 1;
						}
						else {
							$notclean = 0;
						}
					}
					if (!$chimeric) {
						print($chimeraremoved $_);
					}
					if (!$noisy) {
						print($denoised $_);
					}
					if (!$notclean) {
						print($cleaned $_);
					}
				}
			}
			close($pipehandle);
			if ($?) {
				&errorMessage(__LINE__, "Cannot run \"$dumpreads -r -e -q $inputfile.preuchime.bnk\".");
			}
			close($chimeraremoved);
			close($denoised);
			close($cleaned);
		}
	}
}
print(STDERR "done.\n\n");

# delete temporary files
print(STDERR "Deleting temporary files...\n");
if ($uchime) {
	my $child = 0;
	$| = 1;
	$? = 0;
	foreach my $inputfile (@inputfiles) {
		if (my $pid = fork()) {
			$child ++;
			if ($child >= ($numthreads + 1)) {
				if (wait == -1) {
					$child = 0;
				} else {
					$child --;
				}
			}
			if ($?) {
				&errorMessage(__LINE__, "The processes did not finished correctly.");
			}
			next;
		}
		else {
			# delete bank
			while (glob("$inputfile.preuchime.bnk/*")) {
				unlink;
			}
			rmdir("$inputfile.preuchime.bnk");
			# delete bank
			#while (glob("$inputfile.postuchime.bnk/*")) {
			#	unlink;
			#}
			#rmdir("$inputfile.postuchime.bnk");
			exit;
		}
	}
	# join
	while (wait != -1) {
		if ($?) {
			&errorMessage(__LINE__, "The processes did not finished correctly.");
		}
	}
}
if ($denoise) {
	# make denoising1 afg
	#if (system("$bankreport -p -b denoising1.bnk 2> $devnull | gzip -9 -c > denoising1.afg.gz 2> $devnull")) {
	#	&errorMessage(__LINE__, "Cannot run $bankreport -p|gzip for \"denoising1.bnk\".");
	#}
	# delete bank
	while (glob("denoising1.bnk/*")) {
		unlink;
	}
	rmdir("denoising1.bnk");
	# make denoising2 afg
	#if (system("$bankreport -p -b denoising2.bnk 2> $devnull | gzip -9 -c > denoising2.afg.gz 2> $devnull")) {
	#	&errorMessage(__LINE__, "Cannot run $bankreport -p|gzip for \"denoising2.bnk\".");
	#}
	# delete bank
	while (glob("denoising2.bnk/*")) {
		unlink;
	}
	rmdir("denoising2.bnk");
}
print(STDERR "done.\n\n");

chdir('..');

sub makeAFGGZ {
	my $inputbank = shift(@_);
	my $outputafggz = shift(@_);
	my %notclean;
	{
		my $notclean = shift(@_);
		%notclean = %{$notclean};
	}
	my %cleancontigs;
	my %singletons;
	my %contig2notclean;
	my $pipehandlein;
	# explore clean contigs
	unless (open($pipehandlein, "$listReadPlacedStatus $inputbank 2> $devnull |")) {
		&errorMessage(__LINE__, "Cannot run \"$listReadPlacedStatus $inputbank\".");
	}
	while (<$pipehandlein>) {
		s/\r?\n?$//;
		if (my @row = split(/\t/)) {
			if (scalar(@row) == 4 && $row[2] eq 'S') {
				$singletons{$row[1]} = 1;
			}
			elsif (scalar(@row) == 5 && $row[2] eq 'P') {
				if (!$notclean{$row[1]}) {
					$cleancontigs{$row[4]} ++;
				}
				else {
					push(@{$contig2notclean{$row[4]}}, $row[0]);
				}
			}
			#else {
			#	&errorMessage(__LINE__, "Invalid assemble results.\nBank: $inputbank\nSequence: $row[1]\nContig: $row[4]\n");
			#}
		}
	}
	close($pipehandlein);
	if ($?) {
		&errorMessage(__LINE__, "Cannot run \"$listReadPlacedStatus $inputbank\".");
	}
	my $pipehandleout;
	# output clean afg file
	unless (open($pipehandlein, "$bankreport -p -b $inputbank 2> $devnull |")) {
		&errorMessage(__LINE__, "Cannot run \"$bankreport -p -b $inputbank\".");
	}
	unless (open($pipehandleout, "| gzip -9 -c > $outputafggz 2> $devnull")) {
		&errorMessage(__LINE__, "Cannot run \"gzip -9 -c > $outputafggz\".");
	}
	my $minthreshold;
	if ($outputafggz eq 'chimeraremoved.afg.gz') {
		$minthreshold = 1;
	}
	else {
		$minthreshold = $mincleanclustersize;
	}
	my $temp;
	my $nesting = 0;
	my $contig;
	my $read;
	while (<$pipehandlein>) {
		if ($nesting && /^\}/) {
			$nesting --;
		}
		elsif (/^\{([A-Z]+)/) {
			$nesting ++;
			if ($1 eq 'CTG') {
				$contig ++;
			}
			elsif ($1 eq 'RED') {
				$read ++;
			}
		}
		if ($contig || $read) {
			$temp .= $_;
		}
		if ($temp && $nesting == 0) {
			if ($contig && $temp =~ /^iid:(\d+)/m && $cleancontigs{$1} >= $minthreshold) {
				foreach my $notcleanread (@{$contig2notclean{$1}}) {
					$temp =~ s/\{TLE[^\{\}]+?src:$notcleanread[^\{\}]+?\}//s;
				}
				print($pipehandleout $temp);
			}
			elsif ($contig && $temp =~ /^iid:(\d+)/m && exists($cleancontigs{$1}) && $cleancontigs{$1} < $minthreshold && $falsepositive) {
				&errorMessage(__LINE__, "Invalid assemble results.\nBank: $inputbank\nContig: $1\n");
			}
			elsif ($read && $temp =~ /^eid:(\S+)/m && !exists($notclean{$1}) && !exists($singletons{$1})) {
				print($pipehandleout $temp);
			}
			undef($temp);
			undef($contig);
			undef($read);
		}
	}
	close($pipehandleout);
	if ($?) {
		&errorMessage(__LINE__, "Cannot run \"gzip -9 -c > $outputafggz\".");
	}
	close($pipehandlein);
	if ($?) {
		&errorMessage(__LINE__, "Cannot run \"$bankreport -p -b $inputbank\".");
	}
}

sub errorMessage {
	my $lineno = shift(@_);
	my $message = shift(@_);
	print(STDERR "ERROR!: line $lineno\n$message\n");
	print(STDERR "If you want to read help message, run this script without options.\n");
	exit(1);
}

sub helpMessage {
	print(STDERR <<"_END");
Usage
=====
clcleanseq options inputfiles outputfolder

Command line options
====================
assams options end
  Specify commandline options for assams in automatic mode.
(default: --minnseq=1000 --minident=0.995 --minovllen=100 --maxnhits=0
 --strand=plus --assemblemode=u+g --mergemode=normal --alnwiggle=11)

uchime options end
  Specify commandline options for uchime.
(default: --minh 0.1 --mindiv 0.8)

--mincleanclustersize=INTEGER
  Specify minimum size of clean cluster. 0 means automatically
determined (but this is time-consuming). (default: 5)

--detectmode=NOISE|CHIMERA|N+C
  Specify detect mode. (default: N+C)

--pnoisycluster=DECIMAL
  Specify the percentage of noisy cluster. (default: 0.8)

--checkfalsepositive=ENABLE|DISABLE
  Specify whether check false positive of chimera detection or not.
(default: ENABLE)

--minnpositive=INTEGER
  The OTU that consists of this number of reads will be treated as true
positive in chimera detection. 0 means all reads. (default: 1)

--savefasta=ENABLE|DISABLE
  Specify whether output FASTA file or not. (default: ENABLE)

-n, --numthreads=INTEGER
  Specify the number of processes. (default: 1)

Acceptable input file formats
=============================
FASTA (+.qual)
_END
	exit;
}
