use strict;
use Fcntl qw(:flock);

my $buildno = '0.1.2013.03.14';

print(STDERR <<"_END");
clsplitseq $buildno
=======================================================================

Official web site of this script is
http://www.fifthdimension.jp/products/claident/ .
To know script details, see above URL.

Copyright (C) 2011-2013  Akifumi S. Tanabe

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

_END

# display usage if command line options were not specified
unless (@ARGV) {
	&helpMessage();
}

# initialize variables
my $outputfolder = $ARGV[-1];
my $inputfile = $ARGV[-2];
unless (-e $inputfile) {
	&errorMessage(__LINE__, "\"$inputfile\" does not exist.");
}
my $qualfile;

my $runname = $inputfile;
$runname =~ s/\.[^\.]+$//;

my $append;
my $primerfile;
my $threeprimeprimerfile;
my $tagfile;
my $minlen = 1;
my $maxlen;
my $minqual;
my $minqualtag;
my $replaceinternal;
my %keywords;
my %ngwords;
my $converse;
my $maxpmismatch = 0.14;
my $maxnmismatch;
my $threeprimemaxpmismatch = 0.15;
my $threeprimemaxnmismatch;
my $needthreeprimeprimer;
my $goscore = -10;
my $gescore = -1;
my $mmscore = -4;
my $mscore = 5;
my $endgap = 'nobody';
my $numthreads = 1;
for (my $i = 0; $i < scalar(@ARGV) - 2; $i ++) {
	if ($ARGV[$i] =~ /^-+(?:keyword|keywords|k)=(.+)$/i) {
		my $keywords = $1;
		foreach my $keyword (split(/,/, $keywords)) {
			$keywords{$keyword} = 1;
		}
	}
	elsif ($ARGV[$i] =~ /^-+(?:ngword|ngwords|n)=(.+)$/i) {
		my $ngwords = $1;
		foreach my $ngword (split(/,/, $ngwords)) {
			$ngwords{$ngword} = 1;
		}
	}
	elsif ($ARGV[$i] =~ /^-+(?:c|converse)$/i) {
		$converse = 1;
	}
	elsif ($ARGV[$i] =~ /^-+max(?:imum)?(?:r|rate|p|percentage)mismatch=(.+)$/i) {
		$maxpmismatch = $1;
	}
	elsif ($ARGV[$i] =~ /^-+max(?:imum)?n(?:um)?mismatch=(.+)$/i) {
		$maxnmismatch = $1;
	}
	elsif ($ARGV[$i] =~ /^-+(?:primer|primerfile|p)=(.+)$/i) {
		$primerfile = $1;
	}
	elsif ($ARGV[$i] =~ /^-+3primemax(?:imum)?(?:r|rate|p|percentage)mismatch=(.+)$/i) {
		$threeprimemaxpmismatch = $1;
	}
	elsif ($ARGV[$i] =~ /^-+3primemax(?:imum)?n(?:um)?mismatch=(.+)$/i) {
		$threeprimemaxnmismatch = $1;
	}
	elsif ($ARGV[$i] =~ /^-+3prime(?:primer|primerfile)=(.+)$/i) {
		$threeprimeprimerfile = $1;
	}
	elsif ($ARGV[$i] =~ /^-+need3primeprimer$/i) {
		$needthreeprimeprimer = 1;
	}
	elsif ($ARGV[$i] =~ /^-+(?:tag|tagfile|t)=(.+)$/i) {
		$tagfile = $1;
	}
	elsif ($ARGV[$i] =~ /^-+(?:qual|qualfile|q)=(.+)$/i) {
		$qualfile = $1;
	}
	elsif ($ARGV[$i] =~ /^-+min(?:imum)?len(?:gth)?=(\d+)$/i) {
		$minlen = $1;
	}
	elsif ($ARGV[$i] =~ /^-+max(?:imum)?len(?:gth)?=(\d+)$/i) {
		$maxlen = $1;
	}
	elsif ($ARGV[$i] =~ /^-+min(?:imum)?qual(?:ity)?=(\d+)$/i) {
		$minqual = $1;
	}
	elsif ($ARGV[$i] =~ /^-+min(?:imum)?qual(?:ity)?tag=(\d+)$/i) {
		$minqualtag = $1;
	}
	elsif ($ARGV[$i] =~ /^-+replaceinternal$/i) {
		$replaceinternal = 1;
	}
	elsif ($ARGV[$i] =~ /^-+g(?:ap)?o(?:pen)?(?:score)?=(-?\d+)$/i) {
		$goscore = $1;
	}
	elsif ($ARGV[$i] =~ /^-+g(?:ap)?e(?:xtension)?(?:score)?=(-?\d+)$/i) {
		$gescore = $1;
	}
	elsif ($ARGV[$i] =~ /^-+m(?:is)?m(?:atch)?(?:score)?=(-?\d+)$/i) {
		$mmscore = $1;
	}
	elsif ($ARGV[$i] =~ /^-+m(?:atch)?(?:score)?=(-?\d+)$/i) {
		$mscore = $1;
	}
	elsif ($ARGV[$i] =~ /^-+endgap=(nobody|match|mismatch|gap)$/i) {
		$endgap = lc($1);
	}
	elsif ($ARGV[$i] =~ /^-+(?:a|append)$/i) {
		$append = 1;
	}
	elsif ($ARGV[$i] =~ /^-+runname=(.+)$/i) {
		$runname = $1;
	}
	elsif ($ARGV[$i] =~ /^-+(?:n|n(?:um)?threads?)=(\d+)$/i) {
		$numthreads = $1;
	}
	else {
		&errorMessage(__LINE__, "\"$ARGV[$i]\" is unknown option.");
	}
}
if (-e $outputfolder && !$append) {
	&errorMessage(__LINE__, "\"$outputfolder\" already exists.");
}
if ($primerfile && !-e $primerfile) {
	&errorMessage(__LINE__, "\"$primerfile\" does not exist.");
}
if ($threeprimeprimerfile && !-e $threeprimeprimerfile) {
	&errorMessage(__LINE__, "\"$threeprimeprimerfile\" does not exist.");
}
if ($threeprimeprimerfile && !$primerfile) {
	&errorMessage(__LINE__, "3' primers require associated 5' primers.");
}
if ($needthreeprimeprimer && !$threeprimeprimerfile) {
	&errorMessage(__LINE__, "Although \"need3'primer\" is specified, 3'primer does not given.");
}
if ($tagfile && !-e $tagfile) {
	&errorMessage(__LINE__, "\"$tagfile\" does not exist.");
}
if (!$tagfile && !$primerfile) {
	&errorMessage(__LINE__, "Both tag and primer are not given.");
}
unless ($qualfile) {
	$qualfile = "$inputfile.qual";
}
if ($runname =~ /__/) {
	&errorMessage(__LINE__, "\"$runname\" is invalid name. Do not use \"__\" in run name.");
}
if ($runname =~ /\s/) {
	&errorMessage(__LINE__, "\"$runname\" is invalid name. Do not use spaces or tabs in run name.");
}
if ($minqual && !$minqualtag) {
	$minqualtag = $minqual;
}
if ($minlen < 1) {
	&errorMessage(__LINE__, "Minimum length must be equal to or more than 1.");
}

my %primer;
my @primer;
if ($primerfile) {
	my $primerfilehandle;
	unless (open($primerfilehandle, "< $primerfile")) {
		&errorMessage(__LINE__, "Cannot open \"$primerfile\".");
	}
	local $/ = "\n>";
	while (<$primerfilehandle>) {
		if (/^>?\s*(\S[^\r\n]*)\r?\n(.+)\r?\n/s) {
			my $name = $1;
			my $primer = uc($2);
			$name =~ s/\s+$//;
			if ($name =~ /__/) {
				&errorMessage(__LINE__, "\"$name\" is invalid name. Do not use \"__\" in primer name.");
			}
			$primer =~ s/[^A-Z]//sg;
			if (exists($primer{$primer})) {
				&errorMessage(__LINE__, "\"$name\" is same as \"" . $primer{$primer} . "\".");
			}
			else {
				$primer{$primer} = $name;
			}
			push(@primer, $primer);
		}
	}
	close($primerfilehandle);
	print(STDERR "Primers\n");
	foreach (@primer) {
		print(STDERR "$primer{$_}: $_\n");
	}
}

my %threeprimeprimer;
if ($threeprimeprimerfile) {
	my $threeprimeprimerfilehandle;
	unless (open($threeprimeprimerfilehandle, "< $threeprimeprimerfile")) {
		&errorMessage(__LINE__, "Cannot open \"$threeprimeprimerfile\".");
	}
	my $tempno = 0;
	my @threeprimeprimername;
	local $/ = "\n>";
	while (<$threeprimeprimerfilehandle>) {
		if (/^>?\s*(\S[^\r\n]*)\r?\n(.+)\r?\n/s) {
			my $name = $1;
			my $threeprimeprimer = uc($2);
			$name =~ s/\s+$//;
			push(@threeprimeprimername, $name);
			$threeprimeprimer =~ s/[^A-Z]//sg;
			if ($primer[$tempno]) {
				$threeprimeprimer{$primer[$tempno]} = $threeprimeprimer;
			}
			else {
				&errorMessage(__LINE__, "There is no associated 5' primer of \"$name\".");
			}
			$tempno ++;
		}
	}
	close($threeprimeprimerfilehandle);
	print(STDERR "End Primers\n");
	for (my $i = 0; $i < scalar(@threeprimeprimername); $i ++) {
		print(STDERR $threeprimeprimername[$i] . ": " . $threeprimeprimer{$primer[$i]} . "\n");
	}
}

my %tag;
my $taglength;
if ($tagfile) {
	my $tagfilehandle;
	unless (open($tagfilehandle, "< $tagfile")) {
		&errorMessage(__LINE__, "Cannot open \"$tagfile\".");
	}
	local $/ = "\n>";
	while (<$tagfilehandle>) {
		if (/^>?\s*(\S[^\r\n]*)\r?\n(.+)\r?\n/s) {
			my $name = $1;
			my $tag = uc($2);
			$name =~ s/\s+$//;
			if ($name =~ /__/) {
				&errorMessage(__LINE__, "\"$name\" is invalid name. Do not use \"__\" in tag name.");
			}
			$tag =~ s/[^A-Z]//sg;
			if (exists($tag{$tag})) {
				&errorMessage(__LINE__, "The tag \"$tag\" is multiply specified.");
			}
			else {
				$tag{$tag} = $name;
			}
			if ($taglength && $taglength != length($tag)) {
				&errorMessage(__LINE__, "Invalid tag file.");
			}
			else {
				$taglength = length($tag);
			}
		}
	}
	close($tagfilehandle);
	print(STDERR "Tags\n");
	foreach (keys(%tag)) {
		print(STDERR "$tag{$_}: $_\n");
	}
}
print(STDERR "\n");

print(STDERR "Splitting...\n");
# read input file
my $inputhandle;
unless (open($inputhandle, "< $inputfile")) {
	&errorMessage(__LINE__, "Cannot open \"$inputfile\".");
}
my $qualhandle;
if (-e $qualfile) {
	unless (open($qualhandle, "< $qualfile")) {
		&errorMessage(__LINE__, "Cannot open \"$qualfile\".");
	}
}
else {
	&errorMessage(__LINE__, "There is no quality file.");
}
if (!-e $outputfolder && !mkdir($outputfolder)) {
	&errorMessage(__LINE__, "Cannot make output folder.");
}
{
	my $child = 0;
	$| = 1;
	$? = 0;
	local $/ = "\n>";
	while (<$inputhandle>) {
		if (/^>?\s*(\S[^\r\n]*)\r?\n(.+)\r?\n/s) {
			my $taxon = $1;
			my $sequence = $2;
			$taxon =~ s/\s+$//;
			$sequence =~ s/[> \r\n]//g;
			if ($taxon =~ /__/) {
				&errorMessage(__LINE__, "\"$taxon\" is invalid name. Do not use \"__\" in sequence name.");
			}
			my @qual;
			{
				local $/ = "\n>";
				my $qualline = readline($qualhandle);
				if ($qualline =~ /^>?\s*(\S[^\r\n]*)\r?\n(.+)\r?\n/s) {
					my $temp = $1;
					my $qual = $2;
					$temp =~ s/\s+$//;
					if ($temp eq $taxon) {
						@qual = $qual =~ /\d+/g;
					}
				}
				else {
					&errorMessage(__LINE__, "The quality file is invalid.");
				}
			}
			if (my $pid = fork()) {
				$child ++;
				if ($child == $numthreads) {
					if (wait == -1) {
						$child = 0;
					} else {
						$child --;
					}
				}
				if ($?) {
					&errorMessage(__LINE__);
				}
				next;
			}
			else {
				if (%keywords || %ngwords) {
					my $out = 1;
					foreach my $keyword (keys(%keywords)) {
						if ($taxon !~ /$keyword/i) {
							$out = 0;
						}
					}
					foreach my $ngword (keys(%ngwords)) {
						if ($taxon =~ /$ngword/i) {
							$out = 0;
						}
					}
					if (!$out && !$converse || $out && $converse) {
						exit;
					}
				}
				my @seq = $sequence =~ /\S/g;
				# skip short or long sequence
				if ($minlen && scalar(@seq) < $minlen) {
					exit;
				}
				if ($maxlen && scalar(@seq) > $maxlen) {
					exit;
				}
				my $matchornot;
				my $seq = join('', @seq);
				if (%tag && $minqualtag) {
					my $num = 0;
					for (my $i = 0; $i < $taglength; $i ++) {
						if ($qual[$i] < $minqualtag) {
							$num ++;
							last;
						}
					}
					if ($num) {
						exit;
					}
				}
				if (%primer && %tag) {
					foreach my $primer (@primer) {
						if ($seq =~ /^(.{$taglength})(.+)$/ && $tag{$1} && length($2) >= $minlen + length($primer)) {
							my $tag = $1;
							my $tempseq = $2;
							my ($pmismatch, $nmismatch, $overhang) = &searchFiveprimePrimer($primer, substr($tempseq, 0, length($primer) * 2, ''));
							if ((!defined($maxnmismatch) || $nmismatch <= $maxnmismatch) && $pmismatch <= $maxpmismatch) {
								$taxon .= '__' . $runname . '__' . $tag{$tag} . '__' . $primer{$primer};
								for (my $i = 0; $i < $taglength + length($primer) * 2 - length($overhang); $i ++) {
									shift(@qual);
								}
								@seq = split(//, $overhang);
								push(@seq, split(//, $tempseq));
								$matchornot = 1;
								if (%threeprimeprimer) {
									my ($startpos, $endpos, $threeprimepmismatch, $threeprimenmismatch) = &searchThreeprimePrimer($threeprimeprimer{$primer}, join('', @seq));
									if ((!defined($threeprimemaxnmismatch) || $threeprimenmismatch <= $threeprimemaxnmismatch) && $threeprimepmismatch <= $threeprimemaxpmismatch && $startpos > 0) {
										splice(@seq, $startpos);
										splice(@qual, $startpos);
									}
									elsif ($needthreeprimeprimer) {
										undef($matchornot);
									}
								}
								if ($matchornot) {
									last;
								}
							}
						}
					}
				}
				elsif (%primer) {
					foreach my $primer (@primer) {
						if (length($seq) >= $minlen + length($primer)) {
							my ($pmismatch, $nmismatch, $overhang) = &searchFiveprimePrimer($primer, substr($seq, 0, length($primer) * 2, ''));
							if ((!defined($maxnmismatch) || $nmismatch <= $maxnmismatch) && $pmismatch <= $maxpmismatch) {
								$taxon .= '__' . $runname . '__' . $primer{$primer};
								for (my $i = 0; $i < length($primer) * 2 - length($overhang); $i ++) {
									shift(@qual);
								}
								@seq = split(//, $overhang);
								push(@seq, split(//, $seq));
								$matchornot = 1;
								if (%threeprimeprimer) {
									my ($startpos, $endpos, $threeprimepmismatch, $threeprimenmismatch) = &searchThreeprimePrimer($threeprimeprimer{$primer}, join('', @seq));
									if ((!defined($threeprimemaxnmismatch) || $threeprimenmismatch <= $threeprimemaxnmismatch) && $threeprimepmismatch <= $threeprimemaxpmismatch && $startpos > 0) {
										splice(@seq, $startpos);
										splice(@qual, $startpos);
									}
									elsif ($needthreeprimeprimer) {
										undef($matchornot);
									}
								}
								if ($matchornot) {
									last;
								}
							}
						}
					}
				}
				elsif (%tag) {
					if ($seq =~ /^(.{$taglength})(.+)$/ && $tag{$1} && length($2) >= $minlen) {
						my $tag = $1;
						my $tempseq = $2;
						$taxon .= '__' . $runname . '__' . $tag{$tag};
						for (my $i = 0; $i < $taglength; $i ++) {
							shift(@qual);
						}
						@seq = split(//, $tempseq);
						$matchornot = 1;
					}
				}
				unless ($matchornot) {
					exit;
				}
				# skip short sequence
				if ($minlen && scalar(@seq) < $minlen) {
					exit;
				}
				# mask based on quality value
				if ($minqual) {
					# mask end-side characters
					while ($qual[-1] < $minqual) {
						pop(@qual);
						pop(@seq);
					}
					# mask internal characters
					if ($replaceinternal) {
						my @pos;
						for (my $i = 0; $i < scalar(@qual); $i ++) {
							if ($qual[$i] < $minqual) {
								push(@pos, $i);
							}
						}
						foreach my $pos (@pos) {
							splice(@seq, $pos, 1, '?');
						}
					}
				}
				# skip short sequence
				if ($minlen && scalar(@seq) < $minlen) {
					exit;
				}
				# output an entry
				my $outputhandle;
				my $outqualhandle;
				my $outputfile = $taxon;
				$outputfile =~ s/^.+?__//;
				unless (open($outputhandle, ">> $outputfolder/$outputfile.fasta")) {
					&errorMessage(__LINE__, "Cannot write \"$outputfolder/$outputfile.fasta\".");
				}
				unless (flock($outputhandle, LOCK_EX)) {
					&errorMessage(__LINE__, "Cannot lock \"$outputfolder/$outputfile.fasta\".");
				}
				unless (seek($outputhandle, 0, 2)) {
					&errorMessage(__LINE__, "Cannot seek \"$outputfolder/$outputfile.fasta\".");
				}
				unless (open($outqualhandle, ">> $outputfolder/$outputfile.fasta.qual")) {
					&errorMessage(__LINE__, "Cannot write \"$outputfolder/$outputfile.fasta.qual\".");
				}
				unless (flock($outqualhandle, LOCK_EX)) {
					&errorMessage(__LINE__, "Cannot lock \"$outputfolder/$outputfile.fasta.qual\".");
				}
				unless (seek($outqualhandle, 0, 2)) {
					&errorMessage(__LINE__, "Cannot seek \"$outputfolder/$outputfile.fasta.qual\".");
				}
				print($outputhandle ">$taxon\n");
				print($outputhandle join('', @seq) . "\n");
				close($outputhandle);
				print($outqualhandle ">$taxon\n");
				print($outqualhandle join(' ', @qual) . "\n");
				close($outqualhandle);
				exit;
			}
		}
	}
}
close($inputhandle);
close($qualhandle);

# join
while (wait != -1) {
	if ($?) {
		&errorMessage(__LINE__, 'Cannot run BLAST search correctly.');
	}
}
print(STDERR "done.\n\n");

sub searchFiveprimePrimer {
	my $subject = $_[1];
	my ($newquery, $newsubject) = alignTwoSequences($_[0], $_[1]);
	my @temp = $newquery =~ /(-+)$/;
	my $overhanglength = length($temp[0]);
	my @overhang = $newsubject =~ /(.{$overhanglength})$/;
	$newquery =~ s/.{$overhanglength}$//;
	$newsubject =~ s/.{$overhanglength}$//;
	if ($newquery) {
		my $newlength = length($newquery);
		my $nmismatch = $newlength;
		for (my $i = 0; $i < $newlength; $i ++) {
			if (&testCompatibility(substr($newquery, $i, 1), substr($newsubject, $i, 1))) {
				$nmismatch --;
			}
		}
		return(($nmismatch / $newlength), $nmismatch, $overhang[0]);
	}
	else {
		return(1, length($subject), $overhang[0]);
	}
}

sub searchThreeprimePrimer {
	my $subject = $_[1];
	my ($newquery, $newsubject) = alignTwoSequences($_[0], $_[1]);
	my $subquery = $newquery;
	my $front = $subquery =~ s/^-+//;
	my $rear = $subquery =~ s/-+$//;
	my $start = rindex($newquery, $subquery);
	my $end;
	my $sublength = length($subquery);
	my $subsubject = substr($newsubject, $start, $sublength);
	my $nmismatch = $sublength;
	for (my $i = 0; $i < $sublength; $i ++) {
		if (&testCompatibility(substr($subquery, $i, 1), substr($subsubject, $i, 1))) {
			$nmismatch --;
		}
	}
	my $pmismatch = $nmismatch / $sublength;
	# debug print
	#if ((!defined($threeprimemaxnmismatch) || $nmismatch <= $threeprimemaxnmismatch) && $pmismatch <= $threeprimemaxpmismatch) {
	#	print(">query\n$newquery\n>subject\n$newsubject\n\n");
	#}
	$subsubject =~ s/-+//g;
	if (!$front) {
		$start = 0;
	}
	else {
		$start = rindex($subject, $subsubject);
	}
	if ($start == -1) {
		$end = -1;
	}
	else {
		$end = $start + length($subsubject) - 1;
	}
	return($start, $end, $pmismatch, $nmismatch);
}

sub alignTwoSequences {
	my @query = split(//, $_[0]);
	my @subject = split(//, $_[1]);
	# align sequences by Needleman-Wunsch algorithm
	{
		my $querylength = scalar(@query);
		my $subjectlength = scalar(@subject);
		# make alignment matrix, gap matrix, route matrix
		my @amatrix;
		my @rmatrix;
		$rmatrix[0][0] = 0;
		$rmatrix[0][1] = 1;
		for (my $i = 2; $i <= $querylength; $i ++) {
			$rmatrix[0][$i] = 1;
		}
		$rmatrix[1][0] = 2;
		for (my $i = 2; $i <= $subjectlength; $i ++) {
			$rmatrix[$i][0] = 2;
		}
		$amatrix[0][0] = 0;
		if ($endgap eq 'gap') {
			$amatrix[0][1] = $goscore;
			for (my $i = 2; $i <= $querylength; $i ++) {
				$amatrix[0][$i] += $amatrix[0][($i - 1)] + $gescore;
			}
			$amatrix[1][0] = $goscore;
			for (my $i = 2; $i <= $subjectlength; $i ++) {
				$amatrix[$i][0] += $amatrix[($i - 1)][0] + $gescore;
			}
		}
		elsif ($endgap eq 'mismatch') {
			$amatrix[0][1] = $mmscore;
			for (my $i = 2; $i <= $querylength; $i ++) {
				$amatrix[0][$i] += $amatrix[0][($i - 1)] + $mmscore;
			}
			$amatrix[1][0] = $mmscore;
			for (my $i = 2; $i <= $subjectlength; $i ++) {
				$amatrix[$i][0] += $amatrix[($i - 1)][0] + $mmscore;
			}
		}
		elsif ($endgap eq 'match') {
			$amatrix[0][1] = $mscore;
			for (my $i = 2; $i <= $querylength; $i ++) {
				$amatrix[0][$i] += $amatrix[0][($i - 1)] + $mscore;
			}
			$amatrix[1][0] = $mscore;
			for (my $i = 2; $i <= $subjectlength; $i ++) {
				$amatrix[$i][0] += $amatrix[($i - 1)][0] + $mscore;
			}
		}
		elsif ($endgap eq 'nobody') {
			$amatrix[0][1] = 0;
			for (my $i = 2; $i <= $querylength; $i ++) {
				$amatrix[0][$i] = 0;
			}
			$amatrix[1][0] = 0;
			for (my $i = 2; $i <= $subjectlength; $i ++) {
				$amatrix[$i][0] = 0;
			}
		}
		# fill matrix
		for (my $i = 1; $i <= $subjectlength; $i ++) {
			for (my $j = 1; $j <= $querylength; $j ++) {
				my @score;
				if (&testCompatibility($query[($j * (-1))], $subject[($i * (-1))])) {
					push(@score, $amatrix[($i - 1)][($j - 1)] + $mscore);
				}
				else {
					push(@score, $amatrix[($i - 1)][($j - 1)] + $mmscore);
				}
				if ($endgap ne 'gap' && ($i == $subjectlength || $j == $querylength)) {
					if ($endgap eq 'mismatch') {
						push(@score, $amatrix[$i][($j - 1)] + $mmscore);
						push(@score, $amatrix[($i - 1)][$j] + $mmscore);
					}
					elsif ($endgap eq 'match') {
						push(@score, $amatrix[$i][($j - 1)] + $mscore);
						push(@score, $amatrix[($i - 1)][$j] + $mscore);
					}
					elsif ($endgap eq 'nobody') {
						push(@score, $amatrix[$i][($j - 1)]);
						push(@score, $amatrix[($i - 1)][$j]);
					}
				}
				else {
					if ($rmatrix[$i][($j - 1)] == 1) {
						push(@score, $amatrix[$i][($j - 1)] + $gescore);
					}
					else {
						push(@score, $amatrix[$i][($j - 1)] + $goscore);
					}
					if ($rmatrix[($i - 1)][$j] == 2) {
						push(@score, $amatrix[($i - 1)][$j] + $gescore);
					}
					else {
						push(@score, $amatrix[($i - 1)][$j] + $goscore);
					}
				}
				if (($score[1] > $score[0] || $score[1] == $score[0] && $i == $subjectlength) && $score[1] > $score[2]) {
					$amatrix[$i][$j] = $score[1];
					$rmatrix[$i][$j] = 1;
				}
				elsif (($score[2] > $score[0] || $score[2] == $score[0] && $j == $querylength) && $score[2] >= $score[1]) {
					$amatrix[$i][$j] = $score[2];
					$rmatrix[$i][$j] = 2;
				}
				else {
					$amatrix[$i][$j] = $score[0];
					$rmatrix[$i][$j] = 0;
				}
			}
		}
		my @newquery;
		my @newsubject;
		my ($ipos, $jpos) = ($subjectlength, $querylength);
		while ($ipos != 0 && $jpos != 0) {
			if ($rmatrix[$ipos][$jpos] == 1) {
				push(@newquery, shift(@query));
				push(@newsubject, '-');
				$jpos --;
			}
			elsif ($rmatrix[$ipos][$jpos] == 2) {
				push(@newquery, '-');
				push(@newsubject, shift(@subject));
				$ipos --;
			}
			else {
				push(@newquery, shift(@query));
				push(@newsubject, shift(@subject));
				$ipos --;
				$jpos --;
			}
		}
		if (@query) {
			while (@query) {
				push(@newquery, shift(@query));
				push(@newsubject, '-');
			}
		}
		elsif (@subject) {
			while (@subject) {
				push(@newquery, '-');
				push(@newsubject, shift(@subject));
			}
		}
		return(join('', @newquery), join('', @newsubject));
	}
}

sub testCompatibility {
	# 0: incompatible
	# 1: compatible
	my ($seq1, $seq2) = @_;
	my $compatibility = 1;
	if ($seq1 ne $seq2) {
		if ($seq1 eq '-' && $seq2 ne '-' ||
			$seq1 ne '-' && $seq2 eq '-' ||
			$seq1 eq 'A' && $seq2 =~ /^[CGTUSYKB]$/ ||
			$seq1 eq 'C' && $seq2 =~ /^[AGTURWKD]$/ ||
			$seq1 eq 'G' && $seq2 =~ /^[ACTUMWYH]$/ ||
			$seq1 =~ /^[TU]$/ && $seq2 =~ /^[ACGMRSV]$/ ||
			$seq1 eq 'M' && $seq2 =~ /^[KGT]$/ ||
			$seq1 eq 'R' && $seq2 =~ /^[YCT]$/ ||
			$seq1 eq 'W' && $seq2 =~ /^[SCG]$/ ||
			$seq1 eq 'S' && $seq2 =~ /^[WAT]$/ ||
			$seq1 eq 'Y' && $seq2 =~ /^[RAG]$/ ||
			$seq1 eq 'K' && $seq2 =~ /^[MAC]$/ ||
			$seq1 eq 'B' && $seq2 eq 'A' ||
			$seq1 eq 'D' && $seq2 eq 'C' ||
			$seq1 eq 'H' && $seq2 eq 'G' ||
			$seq1 eq 'V' && $seq2 =~ /^[TU]$/) {
			$compatibility = 0;
		}
	}
	return($compatibility);
}

sub errorMessage {
	my $lineno = shift(@_);
	my $message = shift(@_);
	print(STDERR "ERROR!: line $lineno\n$message\n");
	print(STDERR "If you want to read help message, run this script without options.\n");
	exit(1);
}

sub helpMessage {
	print(STDERR <<"_END");
Usage
=====
clsplitseq options inputfile outputfolder

Command line options
====================
-k, --keyword=REGEXP(,REGEXP..)
  Specify regular expression(s) for sequence names. You can use regular
expression but you cannot use comma. All keywords will be used as AND
conditions. (default: none)

-n, --ngword=REGEXP(,REGEXP..)
  Specify regular expression(s) for sequence names. You can use regular
expression but you cannot use comma. All ngwords will be used as AND
conditions. (default: none)

-c, --converse
  If this option is specified, matched sequences to keywords will be cut off
and unmatched sequences to keywords will be saved. (default: off)

-p, --primerfile=FILENAME
  Specify primer list file name. (default: none)

--maxpmismatch=DECIMAL
  Specify maximum acceptable mismatch percentage for primers. (default: 0.14)

--maxnmismatch=INTEGER
  Specify maximum acceptable mismatch number for primers.
(default: Inf)

--3primeprimerfile=FILENAME
  Specify 3' primer list file name. (default: none)

--3primemaxpmismatch=DECIMAL
  Specify maximum acceptable mismatch percentage for 3' primers.
(default: 0.15)

--3primemaxnmismatch=INTEGER
  Specify maximum acceptable mismatch number for 3' primers.
(default: Inf)

--need3primeprimer
  If this option is specified, unmatched sequence to 3' primer will not be
output. (default: off)

-t, --tagfile=FILENAME
  Specify tag list file name. (default: none)

-q, --qualfile=FILENAME
  Specify .qual file name. (default: inputfile.qual)

-a, --append
  Specify outputfile append or not. (default: off)

--minlen=INTEGER
  Specify minimum length threshold. (default: 1)

--maxlen=INTEGER
  Specify maximum length threshold. (default: Inf)

--minqual=INTEGER
  Specify minimum quality threshold. (default: none)

--minqualtag=INTEGER
  Specify minimum quality threshold for tag. (default: minqual)

--replaceinternal
  Specify whether internal low-quality characters replace to missing
data (?) or not. (default: off)

--runname=RUNNAME
  Specify run name for replacing run name.
(default: inputfile name - extension)

--gapopenscore=INTEGER
  Specify gap open score for alignment of primers. (default: -10)

--gapextensionscore=INTEGER
  Specify gap extension score for alignment of primers. (default: -1)

--mismatchscore=INTEGER
  Specify mismatch score for alignment of primers. (default: -4)

--matchscore=INTEGER
  Specify match score for alignment of primers. (default: 5)

--endgap=NOBODY|MATCH|MISMATCH|GAP
  Specify end gap treatment. (default: nobody)

-n, --numthreads=INTEGER
  Specify the number of processes. (default: 1)

Acceptable input file formats
=============================
FASTA (+.qual)
_END
	exit;
}

#--nodelprimer
#--nodeltag
