exec tclsh "$0" "$@"
global MIN_HP_SIZE
set MIN_HP_SIZE 3
global opts
set opts(debug) 0
proc pdebug {msg} {
global opts
if {!$opts(debug)} {
return
}
if {[catch {lindex [info level -1] 0} caller]} {
set caller ::
}
puts stdout "DEBUG($caller): $msg"
flush stdout
}
proc pwarn {msg} {
if {[catch {lindex [info level -1] 0} caller]} {
set caller ::
}
puts stdout "WARNING($caller): $msg"
flush stdout
}
proc perror {msg} {
if {[catch {lindex [info level -1] 0} caller]} {
set caller ::
}
puts stderr "ERROR($caller): $msg"
flush stderr
}
proc PrintMatrix {matname seq} {
upvar $matname mat
set seqlen [string length $seq]
puts -nonewline [format "%6s" " "]
for {set i 0} {$i<=$seqlen} {incr i} {
puts -nonewline [format "%3s" [string index $seq $i ]]
}
puts -nonewline [format "\n%6s" " "]
for {set i 1} {$i<=$seqlen} {incr i} {
puts -nonewline [format "%3s" $i]
}
puts -nonewline "\n"
for {set i 1} {$i<=$seqlen} {incr i} {
if {$i==0} {
set nt " "
} else {
set nt [string index $seq [expr {$i-1}]]
}
puts -nonewline [format "%3s%3d" $nt $i]
for {set j 1} {$j<=$seqlen} {incr j} {
if {$i==$j} {
set value "*"
} elseif {$i<$j} {
set value $mat($i,$j)
} else {
set value "-"
}
puts -nonewline [format "%3s" $value]
}
puts -nonewline "\n"
}
}
proc Tinoco {matname seq} {
global MIN_HP_SIZE
upvar $matname mat
set seqlen [string length $seq]
set bp_list [list au ua gc cg gu ug]
for {set i 1} {$i<=$seqlen} {incr i} {
set nti [string index $seq [expr {$i-1}]]
for {set j [expr {$i+1}]} {$j<=$seqlen} {incr j} {
set ntj [string index $seq [expr {$j-1}]]
set mat($i,$j) 0
foreach refbp $bp_list {
set checkbp "${nti}${ntj}"
if {$checkbp==$refbp} {
set mat($i,$j) 1
}
}
}
}
}
proc OptmatInit {matname dim} {
global MIN_HP_SIZE
upvar $matname mat
for {set i 1} {$i<=$dim} {incr i} {
for {set j [expr {$i-1}]} {$j<=[expr {$i+$MIN_HP_SIZE}]} {incr j} {
set mat($i,$j) 0
pdebug "$i,$j"
}
}
}
proc Bpmax {optmatname dpname seqlen} {
global MIN_HP_SIZE
upvar $optmatname opt
upvar $dpname dp
for {set j 1} {$j<=$seqlen} {incr j} {
for {set i 1} {$i<[expr {$j-$MIN_HP_SIZE}]} {incr i} {
set submax 0
for {set k $i} {$k<[expr {$j-$MIN_HP_SIZE}]} {incr k} {
if {$dp($k,$j) > 0} {
set dummy [expr {$opt($i,[expr {$k-1}]) \
+ $opt([expr {$k+1}],[expr {$j-1}]) \
+ $dp($k,$j)}]
set submax [expr {($submax>$dummy)?$submax:$dummy}]
}
}
set dummy $opt($i,[expr {$j-1}])
set opt($i,$j) [expr {($dummy>$submax)?$dummy:$submax}]
}
}
}
proc Backtrack {optmatname dpname seqlen bpsname} {
upvar $optmatname opt
upvar $dpname dp
upvar $bpsname bps
array set bps {}
for {set i 1} {$i<=$seqlen} {incr i} {
set bps($i) 0
}
set stack {}
lappend stack [list 1 $seqlen]
while {[llength $stack]!=0} {
set possbp [lindex $stack end]
set stack [lreplace $stack end end]
set i [lindex $possbp 0]
set j [lindex $possbp 1]
pdebug "pos $i:$j"
if {$i>=$j} {
continue
} elseif {$opt([expr {$i+1}],$j) == $opt($i,$j)} {
pdebug "down"
lappend stack [list [expr {$i+1}] $j]
} elseif {$opt($i,[expr {$j-1}]) == $opt($i,$j)} {
pdebug "left"
lappend stack [list $i [expr {$j-1}]]
} elseif {[expr {$opt([expr {$i+1}],[expr {$j-1}]) + $dp($i,$j)}] == $opt($i,$j)} {
set bps($i) $j
set bps($j) $i
pdebug "bp $i:$j"
lappend stack [list [expr {$i+1}] [expr {$j-1}]]
} else {
pdebug "bifurcation"
for {set k [expr {$i+1}]} {$k<[expr {$j-1}]} {incr k} {
if {[expr {$opt($i,$k)+$opt([expr {$k+1}],$j)}] == $opt($i,$j)} {
lappend stack [list [expr {$k+1}] $j]
lappend stack [list $i $k]
set k $j ; }
}
}
pdebug "stacksize [llength $stack]"
}
}
proc PrettyPrintBps {bpname seq} {
upvar $bpname bp
set seqlen [string length $seq]
set format "\%4s"
puts -nonewline "No: "
for {set i 1} {$i<=$seqlen} {incr i} {
puts -nonewline [format $format $i]
}
puts -nonewline "\n"
puts -nonewline "Nt: "
for {set i 1} {$i<=$seqlen} {incr i} {
puts -nonewline [format $format [string index $seq [expr {$i-1}]]]
}
puts -nonewline "\n"
puts -nonewline "Bp: "
for {set i 1} {$i<=$seqlen} {incr i} {
puts -nonewline [format $format $bp($i)]
}
puts -nonewline "\n"
puts -nonewline "DB: "
for {set i 1} {$i<=$seqlen} {incr i} {
if {$bp($i)==0} {
set nt "."
} elseif ($bp($i)>$i) {
set nt "("
} else {
set nt ")"
}
puts -nonewline [format $format $nt]
}
puts -nonewline "\n"
}
proc Usage {} {
set myname [file tail [info script $::argv0]]
puts "usage: $myname <rna-sequence>"
}
if {$argc!=1} {
Usage
exit
}
set rawseq [string tolower [lindex $argv 0]]
regsub -all {[^acgu]} $rawseq "" seq
if {$rawseq!=$seq} {
pwarn "your sequence was parsed as $seq"
}
set seq_len [string length $seq]
array set dotplot {}
array set optmat {}
Tinoco dotplot $seq
OptmatInit optmat $seq_len
PrintMatrix dotplot $seq
Bpmax optmat dotplot $seq_len
PrintMatrix optmat $seq
Backtrack optmat dotplot $seq_len optbps
PrettyPrintBps optbps $seq
exit
optimal.tcl caaaacccucuaacccuuuucccaauu; tRNA mit bulge unten
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
c a a a a c c c u c u a a c c c u u u u c c c a a u u
. ( ( ( ( . . . ) . ) ( ( . . . ) ) ( ( . . . ) ) ) )
optimal.tcl caaacacccucuaacccuuuucccaauu; tRNA mit internem loop
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
c a a a c a c c c u c u a a c c c u u u u c c c a a u u
. ( ( ( . ( . . . ) . ) ( ( . . . ) ) ( ( . . . ) ) ) )
optimal.tcl caaacacccuuaacccuuuucccaauu; tRNA mit bulge oben
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
c a a a c a c c c u u a a c c c u u u u c c c a a u u
. ( ( ( . ( . . . ) ) ( ( . . . ) ) ( ( . . . ) ) ) )
optimal.tcl caacacacccuucaacccuucuucccaacuu; tRNA mit bulge oben + freien nts in Verzweigung
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
c a a c a c a c c c u u c a a c c c u u c u u c c c a a c u u
. ( ( . ( . ( . . . ) ) . ( ( . . . ) ) . ( ( . . . ) ) . ) )