#
# This is a constraints file for the transition matrix file
# trans_shadow_partial.pbl.
#
# This configuration file can be used when optimizing the transition matrix parameters
# for a new species or setting. It is an argument to
# optimize_augustus.pl when run in the transition matrix optimization
# mode, e.g. 
#
# optimize_augustus.pl --species=myspecies
# --opt_trans_matrix=/path/augustus/config/species/myspecies/myspecies_trans_shadow_partial.pbl
# --matrix_constraints=constraints_shadow_partial.txt train.gb
#
# Mario Stanke (9.12.2007)
#
#
# --------------------------------------------------------------------
# This is a list of states that should be tried to be optimized. For
# the state numbers see the corresponding states file 'states_shadow.cfg'.
# States not in this list are simply skipped by optimize_augustus.pl.
# Use a list with one state number per line or use the keyword 'all.
# The order determines the order in which the optimization cycles
# through the states.
#
[TRY]
0  # intergenic region
13 # ass 0
18 # ass 1
23 # ass 2
# the following states tune the overall frequency of exons (if not normed)
2  # initial exon 0
5  # internal exon 0
# --------------------------------------------------------------------
# This is a list of states s, such that the transition probabilities
# out of s are normed:
#
# M[s][0] + M[s][1] + ... + M[s][71] = const. 
#
# The constant is computed from the original
# trans_shadow_partial_utr.pbl file. It is 1.0 with few exceptions.
# For unnormed states the M[s][.] values don't form a probability
# distribution anymore. If you don't care -- AUGUSTUS doesn't.
# Use a list with one state number per line or use the keyword 'all.
#
[NORMED]
0
13
18
23
# --------------------------------------------------------------------
# This section is for seting constraints between transition probabilities,
# such as suggested by symmetry such as strand symmetry (or by treating transitions in
# all reading frames the same). Theoretically, i.e. with infinite and
# representative traininig data, this should not be neccessary. However, in the real finite
# world this is a little safeguard against overfitting.
#
[BINDINGS]
(0,1)=(0,24)                    # same frequency of single exon genes on both strands
(0,2)=(0,31)                    # same frequency of initial exons with len=0 (mod 3)
(0,3)=(0,30)                    # same frequency of initial exons with len=1 (mod 3)
(0,4)=(0,29)                    # same frequency of initial exons with len=2 (mod 3)
(13,8)=(18,8)                   # - Prob. of terminating 
(18,8)=(23,8)                   # CDS is independent
(13,8)=(23,8)                   # of reading frame.
(2,10)=(3,15)                   # - equal exit
(3,15)=(4,20)                   # probs out of initial
(2,10)=(4,20)                   # exons for the three frames
(5,10)=(6,15)                   # - equal exit
(5,10)=(7,20)                   # probs out of internal exons for the three frames
(5,10)=(26,36)                  # and
(5,10)=(27,41)                  # for  
(5,10)=(28,46)                  # the both strands
# the Markov chain of phases of successive introns should
# be the time-reversed chain for the reverse strand
MC(((43,28),(43,26),(43,27)),((33,28),(33,26),(33,27)),((38,28),(38,26),(38,27)))=reverse(MC(((13,5),(13,6),(13,7)),((18,5),(18,6),(18,7)),((23,5),(23,6),(23,7)))