#!/bin/sh
#\
exec tclsh "$0" "$@"

# Min. Hairpin
global MIN_HP_SIZE
set    MIN_HP_SIZE 3

global opts
set opts(debug)   0




###   pdebug   ################################################################
#
# print debugging msg if global opts(debug) is true
#
proc pdebug {msg} {
    #############
    global opts
    
    if {!$opts(debug)} {
        return
    }
    if {[catch {lindex [info level -1] 0} caller]} {
        set caller ::
    }
    puts stdout "DEBUG($caller): $msg"
    flush stdout
}
# pdebug


###   pwarn   ################################################################
#
# print debugging msg if global opts(debug) is true
#
proc pwarn {msg} {
    ############
    if {[catch {lindex [info level -1] 0} caller]} {
        set caller ::
    }
    puts stdout "WARNING($caller): $msg"
    flush stdout
}
# pwarn



###   perror   ################################################################
#
#
#
proc perror {msg} {
    #############
    if {[catch {lindex [info level -1] 0} caller]} {
        set caller ::
    }
    puts stderr "ERROR($caller): $msg"
    flush stderr
}    
# end perror




###   PrintMatrix   ##########################################################
#
#
proc PrintMatrix {matname seq} {
    ##########################
    upvar $matname mat
    
    set seqlen [string length $seq]

    
    ### print sequence and indices in first row
    #
    puts -nonewline [format  "%6s" " "]
    for {set i 0} {$i<=$seqlen} {incr i} {
        puts -nonewline [format  "%3s" [string index $seq $i ]]
    }
    puts -nonewline [format  "\n%6s" " "]
    for {set i 1} {$i<=$seqlen} {incr i} {
        puts -nonewline [format  "%3s" $i]
    }
    puts -nonewline "\n"    
    
    
    ### print matrix
    #
    for {set i 1} {$i<=$seqlen} {incr i} {
    
        # print nt and index
        #
        if {$i==0} {
            set nt " "
        } else {
            set nt [string index $seq [expr {$i-1}]]
        }
        puts -nonewline [format  "%3s%3d" $nt $i]
    
        # print matrix values
        #
        for {set j 1} {$j<=$seqlen} {incr j} {
            if {$i==$j} {
                set value "*"
            } elseif {$i<$j} {
                set value $mat($i,$j)
            } else {
                set value "-"
            }
            puts -nonewline [format  "%3s" $value]
        }
        puts -nonewline "\n"
    }
}
# PrintMatrix




###   Tinoco   ################################################################
#
#
proc Tinoco {matname seq} {
    #####################
    global MIN_HP_SIZE
    upvar  $matname mat
    
    set seqlen [string length $seq]
    set bp_list [list au ua gc cg gu ug]
    
    for {set i 1} {$i<=$seqlen} {incr i} {
        set nti [string index $seq [expr {$i-1}]]

        for {set j [expr {$i+1}]} {$j<=$seqlen} {incr j} {
            #for {set j [expr {$i+$MIN_HP_SIZE+1}]} {$j<=$seqlen} {incr j}; faster :)
            set ntj [string index $seq [expr {$j-1}]]
            set mat($i,$j) 0
            foreach refbp $bp_list {
                set checkbp "${nti}${ntj}"
                if {$checkbp==$refbp} {
                    set mat($i,$j) 1
                }
            }
        }
    }
}
# Tinoco




###   OptmatInit   ###########################################################
#
#
proc OptmatInit {matname dim} {
    ##############################
    global MIN_HP_SIZE
    upvar $matname mat
    
    for {set i 1} {$i<=$dim} {incr i} {
        for {set j [expr {$i-1}]} {$j<=[expr {$i+$MIN_HP_SIZE}]} {incr j} {
            set mat($i,$j) 0
            pdebug "$i,$j"
        }
    }
}
# OptmatInit



 
###   Bpmax   #################################################################
#
#
#
proc Bpmax {optmatname dpname seqlen} {
    #################################
    global MIN_HP_SIZE
    upvar  $optmatname opt
    upvar  $dpname     dp 

    for {set j 1} {$j<=$seqlen} {incr j} {
        for {set i 1} {$i<[expr {$j-$MIN_HP_SIZE}]} {incr i} {

            # search substructures
            set submax 0
            for {set k $i} {$k<[expr {$j-$MIN_HP_SIZE}]} {incr k} {
                if {$dp($k,$j) > 0} {
                    set dummy [expr {$opt($i,[expr {$k-1}])                \
                                      + $opt([expr {$k+1}],[expr {$j-1}]) \
                                      + $dp($k,$j)}]
                    set submax [expr {($submax>$dummy)?$submax:$dummy}]
                }
            }
            
            set dummy $opt($i,[expr {$j-1}])
            set opt($i,$j) [expr {($dummy>$submax)?$dummy:$submax}]
        }    
    }
}
# Bpmax




###   Backtrack   #############################################################
#
# eddy's version using a stack
#
# basepair partner indices are writte to bpsname(this_index)
#
proc Backtrack {optmatname dpname seqlen bpsname} {
    #############################################
    upvar  $optmatname opt
    upvar  $dpname     dp 
    upvar  $bpsname    bps
    
    array set bps {}
    
    for {set i 1} {$i<=$seqlen} {incr i} {
        set bps($i) 0
    }
    
    # stack init
    set stack {}
    lappend stack [list 1 $seqlen]
    
    while {[llength $stack]!=0} {
        set possbp [lindex $stack end]
        set stack  [lreplace $stack end end]
        
        set i [lindex $possbp 0]
        set j [lindex $possbp 1]
    
        pdebug "pos $i:$j"
        #puts "opt([expr {$i+1}],[expr {$j-1}]) = $opt([expr {$i+1}],[expr {$j-1}])"
        #puts "dp($i,$j)  = $dp($i,$j)"
        # puts "opt($i,$j) = $opt($i,$j)"
        if {$i>=$j} {
            continue

        # down
        } elseif {$opt([expr {$i+1}],$j) == $opt($i,$j)} {
            pdebug "down"
            lappend stack [list [expr {$i+1}] $j]    
            
        # left 
        } elseif {$opt($i,[expr {$j-1}]) == $opt($i,$j)} {
            pdebug "left"
            lappend stack [list $i [expr {$j-1}]] 

        # downleft -> basepair
        } elseif {[expr {$opt([expr {$i+1}],[expr {$j-1}]) + $dp($i,$j)}] == $opt($i,$j)} {
            set bps($i) $j
            set bps($j) $i
            pdebug "bp $i:$j"
            lappend stack [list [expr {$i+1}] [expr {$j-1}]]
        
        # bifurcation
        } else {
            pdebug "bifurcation"
            for {set k [expr {$i+1}]} {$k<[expr {$j-1}]} {incr k} {
                if {[expr {$opt($i,$k)+$opt([expr {$k+1}],$j)}] == $opt($i,$j)} {
                    lappend stack [list [expr {$k+1}] $j]
                    lappend stack [list $i $k]
                    set k $j ;# break
                }
            }
        }
        pdebug "stacksize [llength $stack]"
    }
}
# Backtrack


###   PrettyPrintBps   ########################################################
#
#
#
proc PrettyPrintBps {bpname seq} {
    ############################
    upvar $bpname bp
    
    set seqlen [string length $seq]

    set format "\%4s"
    
    # print indices
    #
    puts -nonewline "No:  "
    for {set i 1} {$i<=$seqlen} {incr i} {
        puts -nonewline [format  $format $i]
    }
    puts -nonewline "\n"
    
    # print sequence
    #
    puts -nonewline "Nt:  "
    for {set i 1} {$i<=$seqlen} {incr i} {
        puts -nonewline [format $format [string index $seq [expr {$i-1}]]]
    }
    puts -nonewline "\n"
    
    # print basepairs
    #
    puts -nonewline  "Bp:  "
    for {set i 1} {$i<=$seqlen} {incr i} {
        puts -nonewline [format $format  $bp($i)]
    }
    puts -nonewline  "\n"
    
    # print dotbracket
    #
    puts -nonewline  "DB:  "
    for {set i 1} {$i<=$seqlen} {incr i} {
        if {$bp($i)==0} {
            set nt "."
        } elseif ($bp($i)>$i) {
            set nt "("
        } else {
            set nt ")"
        }
        puts -nonewline [format  $format $nt]
    }
    puts -nonewline  "\n"
}
#  PrettyPrintBps




###   Usage   #################################################################
#
#
proc Usage {} {
    #########
    set myname [file tail [info script $::argv0]]
    puts "usage: $myname <rna-sequence>"
}
# Usage




if {$argc!=1} {
    Usage
    exit
}

set rawseq [string tolower [lindex $argv 0]]
regsub -all {[^acgu]} $rawseq "" seq
if {$rawseq!=$seq} {
    pwarn "your sequence was parsed as $seq"
}



set seq_len [string length $seq]

array set dotplot {}
array set optmat  {}

Tinoco      dotplot $seq
OptmatInit  optmat  $seq_len

PrintMatrix dotplot $seq

Bpmax optmat dotplot $seq_len

PrintMatrix optmat $seq

Backtrack optmat dotplot $seq_len optbps

PrettyPrintBps optbps $seq

exit



################################################################################
################################################################################
################################################################################

optimal.tcl caaaacccucuaacccuuuucccaauu; tRNA mit bulge unten
  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 
  c  a  a  a  a  c  c  c  u  c  u  a  a  c  c  c  u  u  u  u  c  c  c  a  a  u  u 
  .  (  (  (  (  .  .  .  )  .  )  (  (  .  .  .  )  )  (  (  .  .  .  )  )  )  ) 

optimal.tcl caaacacccucuaacccuuuucccaauu; tRNA mit internem loop
  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 
  c  a  a  a  c  a  c  c  c  u  c  u  a  a  c  c  c  u  u  u  u  c  c  c  a  a  u  u 
  .  (  (  (  .  (  .  .  .  )  .  )  (  (  .  .  .  )  )  (  (  .  .  .  )  )  )  ) 

optimal.tcl caaacacccuuaacccuuuucccaauu; tRNA mit bulge oben
  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 
  c  a  a  a  c  a  c  c  c  u  u  a  a  c  c  c  u  u  u  u  c  c  c  a  a  u  u 
  .  (  (  (  .  (  .  .  .  )  )  (  (  .  .  .  )  )  (  (  .  .  .  )  )  )  )

optimal.tcl caacacacccuucaacccuucuucccaacuu; tRNA mit bulge oben + freien nts in Verzweigung
  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
  c  a  a  c  a  c  a  c  c  c  u  u  c  a  a  c  c  c  u  u  c  u  u  c  c  c  a  a  c  u  u 
  .  (  (  .  (  .  (  .  .  .  )  )  .  (  (  .  .  .  )  )  .  (  (  .  .  .  )  )  .  )  )