# first, we need to define our subroutines (functions) # which will be used in our program. # these all begin with the keyword "sub" # function to read in a file given its filename sub readFile { my ($filename) = @_; my $seq = ""; open(FILE, $filename); while () { chomp($_); if (substr($_, 0, 1) ne ">") { $seq .= $_; } } return $seq; } # function to return the maximum value of three numbers # as well as an identifier indicating which value was the maximum. # This corresponds to: upArrow = 1, leftArrow = 2, upper-leftArrow = 3 sub getMax { my ($s1, $s2, $s3) = @_; my $maxValue = $s1; my $maxNum = 1; # value of Arrow if ($maxValue < $s2) { $maxValue = $s2; $maxNum = 2; } if ($maxValue < $s3) { $maxValue = $s3; $maxNum = 3; } return ($maxValue, $maxNum); } # a utility function to print the score matrix sub printScores { my ($v, $w, @scores) = @_; my $n = length $v; my $m = length $w; print "*$w\n"; for ($i=1; $i<=$n; $i++) { print substr($v,$i-1,1); for ($j=1; $j<=$m; $j++) { print $scores[$i][$j]; } print "\n"; } } # the alignment function sub LCS { my ($v, $w) = @_; #initialize the matrices my @scores = (); my @arrows = (); my $n = length $v; my $m = length $w; for ($i=0; $i <= $n; $i++) { my @newRow = (); for ($j=0; $j <= $m; $j++) { push(@newRow, 0); } push(@scores, [@newRow]); } for ($i=1; $i <= $n; $i++) { my $vi = substr($v, $i-1, 1); for ($j=1; $j <= $m; $j++) { my $wj = substr($w, $j-1, 1); my $s1 = $scores[$i-1][$j]; my $s2 = $scores[$i][$j-1]; if ($vi eq $wj) { $s3 = $scores[$i-1][$j-1] + 1; } else { $s3 = -1; } ($scores[$i][$j], $arrows[$i][$j]) = getMax($s1, $s2, $s3); } } #printScores($v, $w, @scores); return ($scores[$n][$m], @arrows); } # a utility function to print the arrow matrix sub printArrows { my ($v, $w, @arrows) = @_; my $n = length $v; my $m = length $w; for ($i=0; $i<=$n; $i++) { for ($j=0; $j<$m; $j++) { print $arrows[$i][$j]; } print "\n"; } } # a utility function to print the common subsequence sub printLCS { my ($v, $i, $j, $seqnum, @arrows) = @_; if ($i == 0 or $j == 0) { return; } if ($arrows[$i][$j] == 3) { printLCS($v, $i-1, $j-1, $seqnum, @arrows); print substr($v, $i-1, 1); } else { if ($arrows[$i][$j] == 1) { printLCS($v, $i-1, $j, $seqnum, @arrows); } else { printLCS($v, $i, $j-1, $seqnum, @arrows); } } } # we are now finished with our subroutines. # the main part of the program starts here! # first, read in the two filenames from the runtime arguments my $seq1 = readFile($ARGV[0]); my $seq2 = readFile($ARGV[1]); print "Read two sequences:\n"; print "$seq1\n"; print "$seq2\n"; print "\nTheir longest common subsequence is as follows:\n"; # calculate the longest common subsequence ($topScore, @arrows) = LCS($seq1, $seq2); #printArrows($seq1, $seq2, @arrows); my $n = length($seq1); my $m = length($seq2); # print the longest common subsequence printLCS($seq1, $n, $m, 1, @arrows);