use strict;
use Getopt::Std;
my $DEBUG; my $MAX_NUM_BIF=100;
sub print_matrix($@) {
my ($seq, @mat) = @_;
my ($seqlen) = length($seq);
my ($nt, $i, $j, $value);
printf "%3s%3s", " ";
for ($i=0; $i<$seqlen; $i++) {
printf "%3s", substr($seq, $i, 1);
}
printf "\n%3s%3s", " ";
for ($i=1; $i<=$seqlen; $i++) {
printf "%3s", $i;
}
print "\n";
for ($i=1; $i<=$seqlen; $i++) {
if ($i eq 0) {
$nt = " ";
} else {
$nt = substr($seq, $i-1, 1);
}
printf "%3s%3d", $nt, $i;
for ($j=1; $j<=$seqlen; $j++) {
if ($i eq $j) {
$value= "*";
} else {
$value = $mat[$i][$j];
}
printf "%3s", $value;
}
print "\n";
}
}
sub print_basepairs($@) {
my ($seq, @bp) = @_;
my $seqlen = length($seq);
my ($format) = "\%3s";
my ($i, $j) = $seqlen, 1;
while ( $i>1 ) {
$j++;
$i /= 10;
}
$j += 2;
substr($format, 1, 1)=$j;
print "No: ";
for (my $i = 1; $i<=$seqlen; $i++) {
printf $format, $i;
}
print "\n";
print "Nt: ";
for ($i = 1; $i<=$seqlen; $i++) {
printf $format, substr($seq, $i-1, 1);
}
print "\n";
print "Bp: ";
for ($i = 1; $i<=$seqlen; $i++) {
printf $format, $bp[$i];
}
print "\n";
print "DB: ";
for ($i = 1; $i<=$seqlen; $i++) {
if ($bp[$i] eq 0) {
@_=".";
} elsif ($bp[$i]>$i) {
@_="(";
} else {
@_=")";
}
printf $format, @_;
}
print "\n";
}
sub max (@) {
my ($max) = shift(@_);
my $foo;
foreach $foo (@_) {
$max = $foo if $max < $foo;
}
return $max;
}
sub optimize ($$$) {
my ($seqlen, $area_ref, $mms_ref) = @_;
my $dummy;
for (my $j=1; $j<=$seqlen; $j++) {
for (my $i=1; $i<$j-3; $i++) {
$dummy=0;
for (my $k=$i; $k<$j-3; $k++) {
if ($area_ref->[$k][$j] > 0) {
$dummy = max($dummy,
$mms_ref->[$i][$k-1] +
$mms_ref->[$k+1][$j-1] +
$area_ref->[$k][$j]);
}
}
$mms_ref->[$i][$j] = max($mms_ref->[$i][$j-1], $dummy);
if ($DEBUG) {
print "optimize: setting mms->[$i][$j] to $mms_ref->[$i][$j-1]\n"
}
}
}
}
sub basepair($$) {
my ($nt1, $nt2) = @_;
my @valid_bp = ("gc", "au", "gu");
my $my_bp = "$nt1$nt2";
if ($nt1 eq "n" || $nt2 eq "n") {
return 1;
}
foreach my $bp (@valid_bp) {
if ($my_bp eq $bp || $my_bp eq reverse($bp)) {
if ($DEBUG) {print "basepair: $nt1:$nt2 is a basepair\n"};
return 1;
}
}
return 0;
}
sub create_dotplot($$) {
my ($seq, $mat_ref) = @_;
my ($seqlen) = length($seq);
my ($nt_i, $nt_j);
$mat_ref->[0][0]="0"; for (my $i=1; $i<=$seqlen; $i++) { $nt_i = substr($seq, $i-1, 1);
$mat_ref->[$i][$i]=0;
for (my $j=$i+3; $j<=$seqlen; $j++) { $nt_j = substr($seq, $j-1, 1);
$mat_ref->[$i][$j] = basepair($nt_i, $nt_j);
if ($DEBUG) {
print "create_dotplot: matrix[$i=$nt_i][$j=$nt_j]",
$mat_ref->[$i][$j], "\n";
}
}
}
}
sub matrix_init($$) {
my ($dim, $init) = @_;
my @dummy;
for (my $i=0; $i<=$dim; $i++) {
for (my $j=0; $j<=$dim; $j++) {
$dummy[$i][$j]=$init;
}
}
return @dummy;
}
sub backtrack_eddy($$$) {
my ($seqlen, $area_ref, $mms_ref) = @_;
my (@stack);
my ($i, $j, $k);
my @basepairs;
for ($i=0; $i<$seqlen; $i++) {
$basepairs[$i+1] = "0";
}
push(@stack, 1);
push(@stack, $seqlen);
while ($#stack != -1) {
$j=pop(@stack);
$i=pop(@stack);
if ($i>=$j) {
next;
} elsif ($mms_ref->[$i+1][$j] eq $mms_ref->[$i][$j]) {
push(@stack, $i+1);
push(@stack, $j);
} elsif ($mms_ref->[$i][$j-1] eq $mms_ref->[$i][$j]) {
push(@stack, $i);
push(@stack, $j-1);
} elsif ( ($mms_ref->[$i+1][$j-1] + $area_ref->[$i][$j]) eq $mms_ref->[$i][$j]) {
if ($DEBUG) {
print "eddy: basepair: $i:$j\n";
}
$basepairs[$i]=$j;
$basepairs[$j]=$i;
push(@stack, $i+1);
push(@stack, $j-1);
} else {
for ($k=$i+1; $k<=$j-1; $k++) {
if ( ($mms_ref->[$i][$k] + $mms_ref->[$k+1][$j]) eq $mms_ref->[$i][$j]) {
push(@stack, $k+1);
push(@stack, $j);
push(@stack, $i);
push(@stack, $k);
$k=$j; }
}
}
}
return @basepairs;
}
sub backtrack_steger($$$$) {
my ($area_ref, $mms_ref, $i1, $j1) = @_;
my ($i, $j, $k);
my (@ipos, @jpos);
my (@basepairs);
for ($i=0; $i<$j1; $i++) {
$basepairs[$i+1] = "0";
}
my $pos = 1;
$ipos[$pos-1] = $i1;
$jpos[$pos-1] = $j1;
while ($pos > 0) {
$i = $ipos[$pos-1];
$j = $jpos[$pos-1];
$pos -= 1;
while ($i < $j-3) {
if ($mms_ref->[$i][$j] == $mms_ref->[$i][$j-1]) {
$j -= 1;
} else {
for ($k=$i; $k<$j-3; $k++) {
if ($mms_ref->[$i][$j] == ( $mms_ref->[$i][$k-1] +
$mms_ref->[$k+1][$j-1] +
$area_ref->[$k][$j]) ) {
if ($pos eq $MAX_NUM_BIF) {
print "ERROR: (backtrack)\nToo many bifurcations!\n";
return;
}
$pos++;
$ipos[$pos-1] = $k+1;
$jpos[$pos-1] = $j-1;
if ($area_ref->[$k][$j] > 0) {
$basepairs[$k] = $j;
$basepairs[$j] = $k;
}
$j = $k-1;
}
}
}
}
}
return @basepairs;
}
sub main($$) {
my ($seq, $backtrack) = @_;
my (@area, @mms, @basepairs);
my $seqlen = length($seq);
@area = matrix_init($seqlen, "-");
@mms = matrix_init($seqlen, 0);
print "\nAREA AFTER INIT :\n";
print_matrix($seq, @area);
create_dotplot($seq, \@area);
print "\nAREA AFTER DOTPLOT :\n";
print_matrix($seq, @area);
optimize($seqlen, \@area, \@mms);
print "\nMMS AFTER OPTIMZE :\n";
print_matrix($seq, @mms);
if ($backtrack eq "steger") {
@basepairs = backtrack_steger(\@area, \@mms, 1, $seqlen);
} elsif ($backtrack eq "eddy") {
@basepairs = backtrack_eddy($seqlen, \@area, \@mms);
} else {
print "ERROR: BACKTRACK METHOD UNKNOWN\n";
exit
}
print "\nBACKTRACK RESULTS :\n";
print_basepairs($seq, @basepairs);
}
my %OPTS;
getopts('dhs:b:', \%OPTS);
if ($OPTS{h}) {
print "Available Options:\n";
print "\t[-d] : print some debugging info\n";
print "\t -s <sequence> : the sequence\n";
print "\t valid chars are \"acgun\"\n";
print "\t -b <steger|eddy> : 0: Steger's Backtrack (default)\n";
print "\t 1: Eddy's Backtrack\n";
print "\t -h : print this help\n";
exit 0;
}
if ($OPTS{d}) {
$DEBUG=1;
}
if (!$OPTS{b}) {
$OPTS{b}="steger";
}
if (!$OPTS{s}) {
print "No sequence given !\n";
exit -1;
}
my $SEQ = lc($OPTS{s});
$SEQ =~ s/[^acgun]//;
if (length($OPTS{s}) ne length($SEQ)) {
print "Your Sequence \"$OPTS{s}\" was converted to \"$SEQ\"\n";
}
main($SEQ, $OPTS{b});