# $Id$ package Bio::Tools::Run::MrBayes; use IPC::Open2; use strict; use warnings; use Bio::Root::Root; use Bio::Phylo::IO qw(parse); # Alarm, not part of BioPerl use Bio::Factory::ApplicationFactoryI; use Bio::Tools::Run::WrapperBase; use vars qw(@ISA); @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase); our $VERSION = 0.1; =head2 new Title : new Usage : my $obj = new Bio::Tools::Run::MrBayes(); Function: Builds a new Bio::Tools::Run::MrBayes object Returns : Bio::Tools::Run::MrBayes Args : -save_tempfiles => boolean to save the generated tempfiles and NOT cleanup after onesself (default FALSE) -executable => where the mrbayes executable resides =cut sub new { my ( $class, @args ) = @_; my $self = $class->SUPER::new( @args ); # random seed for mrbayes my $seed = time, # attaches to value of $self->{$command}->{_string} for mrbayes-syntax # stringification my $closure = sub { return $_[0].'='.$_[1].'('.join(',',@{$_[2]}).')' if @{$_[2]} }; # MrBayes commands mirrored by Bio::Tools::Run::MrBayes object $self->{'commands'} = [ 'set', 'pairs', 'partition', 'usertree', 'outgroup', 'constraint', 'ctype', 'link', 'unlink', 'lset', 'prset', 'mcmcp', ]; # Following hash references represent MrBayes command options. In easy # cases, the options are key/value pairs, which are easily stored in hash # refs and stringified once $self->run is called. In some cases, a range # of values must be specified (which are stored in array refs). # Unfortunately, MrBayes is slightly inconsistent in the way these are # stringified. Hence, where appropriate, there are code references attached # to $self->{$command}->{_string}, which stringify the internal data # structures to MrBayes's syntax. Refer to the respective setters to # understand how the options are stored internally. $self->{'set'} = { 'autoclose' => undef, 'nowarn' => undef, 'quitonerror' => undef, }; $self->{'pairs'} = { '_string' => sub { return join(', ',@{$_[2]}) if @{$_[2]} }, }; $self->{'usertree'} = undef, $self->{'outgroup'} = { '_string' => sub { return $_[2] }, }; $self->{'constraint'} = { '_string' => sub { return $_[1].' '.$_[2]->{'p'}.' = '. join(' ', @{$_[2]->{'taxa'}}) }, }; $self->{'ctype'} = { '_string' => sub { return $_[1].': '. join(' ',@{$_[2]}) }, }; $self->{'link'} = { '_string' => sub { return $_[1].'=('. join(',',@{$_[2]}) .')' }, }; $self->{'partition'} = { '_string' => sub { return $_[1] . '=' . scalar @{ $_[2] } . ':' . join(' ', @{ $_ }) . ',' foreach ( @{ $_[2] } ) }, }; $self->{'unlink'} = { '_string' => sub { return $_[1] . '=(' . join(',', @{ $_[2] }) . ')' }, }; $self->{'mcmcp'} = { 'reweight' => { '_string' => sub { return $_[1] . '(' . join( ',', @{ $_[2] } ) . ')' if @{ $_[2] } }, }, 'seed' => undef, 'swapseed' => undef, 'ngen' => undef, 'nruns' => undef, 'nchains' => undef, 'temp' => undef, 'swapfreq' => undef, 'nswaps' => undef, 'samplefreq' => undef, 'printfreq' => undef, 'printall' => undef, 'printmax' => undef, 'mcmcdiagn' => undef, 'diagnfreq' => undef, 'minpartfreq' => undef, 'allchains' => undef, 'allcomps' => undef, 'relburnin' => undef, 'burnin' => undef, 'burninfrac' => undef, 'stoprule' => undef, 'stopval' => undef, 'filename' => undef, 'startingtree' => undef, 'nperts' => undef, 'savebrlens' => undef, 'ordertaxa' => undef, }; $self->{'lset'} = { 'nucmodel' => undef, 'nst' => undef, 'code' => undef, 'ploidy' => undef, 'rates' => undef, 'ngammacat' => undef, 'nbetacat' => undef, 'omegavar' => undef, 'covarion' => undef, 'coding' => undef, 'parsmodel' => undef, }; $self->{'prset'} = { 'm3omegapr' => undef, 'statefreqpr' => undef, 'ratepr' => undef, 'topologypr' => undef, 'sampleprob' => undef, 'tratiopr' => { '_string' => $closure, }, 'ny98omega1pr' => { '_string' => $closure, }, 'revmatpr' => { '_string' => $closure, }, 'aarevmatpr' => { '_string' => $closure, }, 'omegapr' => { '_string' => $closure, }, 'codoncatfreqs' => { '_string' => $closure, }, 'shapepr' => { '_string' => $closure, }, 'ratecorrpr' => { '_string' => $closure, }, 'pinvarpr' => { '_string' => $closure, }, 'covswitchpr' => { '_string' => $closure, }, 'speciationpr' => { '_string' => $closure, }, 'extinctionpr' => { '_string' => $closure, }, 'thetapr' => { '_string' => $closure, }, 'ny98omega3pr' => { '_string' => $closure, }, 'treeheightpr' => { '_string' => $closure, }, 'aamodelpr' => { '_string' => $closure, }, 'symdirihyperpr' => { '_string' => $closure, }, 'brlenspr' => { '_string' => $closure, }, }; # additional arguments passed to constructor. Not sure which other ones # could come in here, but we only care about the program => 'mb' argument # anyway. my ( $attr, $value ); while ( @args ) { $attr = shift @args; $value = shift @args; next if $attr =~ m/^-/; # For some reason, we don't want named parameters if ( $attr =~ m/^EXE$/i ) { $self->program_name( $value ); } if ( $attr =~ m/^PROGRAM$/i ) { $self->executable( Bio::Root::IO->catfile( $value, $self->program_name ) ); next; } } return $self; } =head2 program_name Title : program_name Usage : >program_name() Function: holds the program name Returns: string Args : None =cut # hardcoded program name. Unfortunately, there is some variation in # executable name: on linux and osx it compiles into 'mb'. On windows, # the pre-compiled binary is called something like 'MrBayes3_0b4.exe'. sub program_name { my $self = shift; $self->{'_mbexe'} = shift if @_; return $self->{'_mbexe'}; } =head2 program_dir Title : program_dir Usage : ->program_dir() Function: returns the program directory, obtained from ENV variable. Returns: string Args : =cut # program dir, dependent on MBDIR environment variable $ENV{'MBDIR'}; sub program_dir { return Bio::Root::IO->catfile( $ENV{'MRBAYESDIR'} ) if $ENV{'MRBAYESDIR'}; } =head2 run, mcmc Title : run Usage : $mrbayes->run Function: writes input file, launches MrBayes. Returns: Outfile base name, read handle, write handle, PID; Args : =cut *mcmc = \&run; # writes input file, launches MrBayes. sub run { my ($self) = @_; my $file = $self->_nexfile; my $exe = $self->executable; my ( $readfh, $writefh, $pid ); eval { $pid = open2( $readfh, $writefh, $exe, $file ); }; if ( $@ && "$@" =~ m/^open2:/ ) { $self->throw( $@ ); } elsif ( $pid ) { $self->{'_pid'} = $pid; $self->{'_readfh'} = $readfh; $self->{'_writefh'} = $writefh; $self->{'_outfile'} = $file; return $file, $readfh, $writefh, $pid; } else { $self->throw( "MrBayes execution failed for unknown reasons" ); } } =head2 commands Title : commands Usage : $mrbayes->commands(); Function: Gets the available commands for the Bio::Tools::Run::MrBayes object. Returns : Array (ref) of strings. Args : None. =cut # array (ref) of available commands sub commands { my $self = shift; return wantarray ? @{ $self->{'commands'} } : $self->{'commands'}; } =head2 usertree Title : usertree Usage : $mrbayes->usertree($tree); Function: Gets/Sets the input usertree for MrBayes. Returns : A tree description in Newick format. Args : [Optional] A tree description in Newick format. =cut # $self->usertree( '((A,B),C);' ); sub usertree { my $self = shift; $self->{'usertree'} = shift if @_; return $self->{'usertree'}; } =head2 outgroup Title : outgroup Usage : $mrbayes->outgroup($outgroup); Function: Gets/Sets the outgroup. Returns : A taxon name (string). Args : [Optional] A taxon name (string). =cut # $self->outgroup( 'taxon_1' ); sub outgroup { my $self = shift; $self->{'outgroup'}->{'name'} = shift if @_; return $self->{'outgroup'}; } =head2 pairs Title : pairs Usage : $mrbayes->pairs(@pairs); Function: Gets/Sets the site pairs (for stems). Returns : An array of pairs. Args : [Optional] An array of pairs, formatted as: @pairs = ( '1:2', '3:4', '5:6' ); =cut # $self->pairs( '4:34', '3:456', '6:32' ); sub pairs { my $self = shift; if ( @_ ) { my $pairs = []; foreach ( @_ ) { if ( /^\d+:\d+$/ ) { push @{ $pairs }, $_; } else { $self->throw( "\"$_\" is not a valid pair definition (e.g. 33:56)" ); } } $self->{'pairs'}->{'pairs'} = $pairs; } return $self->{'pairs'}->{'pairs'}; } =head2 constraint Title : constraint Usage : $mrbayes->constraint(%constraint); Function: Gets/Sets a topological constraint Returns : A hash ref containing all constraints currently in memory. Args : [Optional] A hash containing a constraint description: %constraint = ( 'name' => $name, # name for the constraint 'p' => 100, # prior for the constraint 'taxa' => [ 1, 2, 3 ], # array ref of taxa in constraint, can be ); # numbers or names. =cut # $self->constraint( 'name' => 'myconstraint', 'p' => 100, 'taxa' => [1, 2, 3] ); sub constraint { my $self = shift; if ( @_ ) { my %opt; eval { %opt = @_ }; if ( $@ ) { $self->throw( $@ ); } my $name; while ( my ( $key, $value ) = each %opt ) { if ( $key =~ m/name/i ) { $name = lc $key; $self->{'constraint'}->{$name} = {}; } } while ( my ( $key, $value ) = each %opt ) { $self->{'constraint'}->{$name} = { lc $key => $value }; } } return $self->{'constraint'}; } =head2 ctype Title : ctype Usage : $mrbayes->ctype(%ctype); Function: Gets/Sets the categorical character type for a set of characters Returns : A hash ref containing all ctypes currently in memory. Args : [Optional] A hash containing a ctype description: %ctype = ( 'ordering' => 'unordered', # For example. 'chars' => [ 1, 2, 3 ], # Array ref of characters to which ); # ordering applies Notes : If an ordering already exists, the chars are added to it. =cut # Gets/Sets options for the ctype command sub ctype { my $self = shift; if ( @_ ) { my %opt; eval { %opt = @_ }; if ( $@ ) { $self->throw( $@ ); } my ( $ordering, $chars ); while ( my ( $key, $value ) = each %opt ) { if ( $key =~ m/ordering/i ){ $ordering = lc $value; } elsif ( $key =~ m/chars/i ) { $chars = $value; } } if ( $self->{'ctype'}->{$ordering} ) { push @{ $self->{'ctype'}->{$ordering} }, $chars; } else { $self->{'ctype'}->{$ordering} = $chars; } } return $self->{'ctype'}; } =head2 link Title : link Usage : $mrbayes->link(%link); Function: Gets/Sets links of model parameters across partitions Returns : A hash ref containing all links currently in memory. Args : [Optional] A hash containing link descriptions: %link = ( 'Tratio' => [ 'all' ], # For example. 'Shape' => [ 1, 2, 3 ], ); =cut # Gets/Sets options for the link command. sub link { my $self = shift; if ( @_ ) { my %opt; eval { %opt = @_ }; if ( $@ ) { $self->throw( $@ ); } while ( my ( $key, $value ) = each %opt ) { $self->{'link'}->{$key} = $value; } } return $self->{'link'}; } =head2 partition Title : partition Usage : $mrbayes->partition(%partition); Function: Gets/Sets character partitions. Returns : A hash ref containing all partitions currently in memory. Args : [Optional] A hash containing partition descriptions: %partition = ( 'by_codon' => [ [ 1, 4, 7 ], [ 2, 5, 8 ], [ 3, 6, 9 ] ], ); =cut # Gets/Sets options for the partition command. sub partition { my $self = shift; if ( @_ ) { my %opt; eval { %opt = @_ }; if ( $@ ) { $self->throw( $@ ); } while ( my ( $key, $value ) = each %opt ) { $self->{'partition'}->{$key} = $value; } } return $self->{'partition'}; } =head2 unlink Title : unlink Usage : $mrbayes->unlink(%unlink); Function: unlinks model parameters across partitions Returns : A hash ref containing all links currently in memory. Args : [Optional] A hash containing link descriptions: unlink = ( 'Tratio' => [ 'all' ], # For example. 'Shape' => [ 1, 2, 3 ], ); =cut # Gets/Sets options for the unlink command. sub unlink { my $self = shift; if ( @_ ) { my %opt; eval { %opt = @_ }; if ( $@ ) { $self->throw( $@ ); } while ( my ( $key, $value ) = each %opt ) { $self->{'unlink'}->{$key} = $value; } } return $self->{'unlink'}; } =head2 set Title : set Usage : $mrbayes->set(%set); Function: Gets/Sets global options for MrBayes behaviour Returns : A hash ref containing all currently set options. Args : [Optional] A hash containing link descriptions: %set = ( 'autoclose' => 'yes', ); =cut # Gets/sets options for the 'set' command. sub set { my $self = shift; if ( @_ ) { my %opt; eval { %opt = @_ }; if ( $@ ) { $self->throw( $@ ); } while ( my ( $key, $value ) = each %opt ) { $self->{'set'}->{$key} = $value; } } return $self->{'set'}; } =head2 matrix Title : matrix Usage : $mrbayes->matrix($matrix); Function: Gets/Sets character state matrix Returns : A matrix object Args : [Optional] A matrix object =cut # Sets the character state matrix. At present, we'll just assume this is # a Bio::Phylo::Matrices::Matrix object. sub matrix { my $self = shift; $self->{'matrix'} = shift if @_; return $self->{'matrix'}; } =head2 prset Title : prset Usage : $mrbayes->prset( %prset ); Function: Gets/Sets priors Returns : A hash of prset options Args : [Optional] A hash of prset options =cut # Gets/sets the options for the prset command. sub prset { my $self = shift; if ( @_ ) { my %opt; eval { %opt = @_ }; if ( $@ ) { $self->throw( $@ ); } while ( my ( $key, $value ) = each %opt ) { if ( exists $self->{'prset'}->{lc($key)} ) { $self->{'prset'}->{lc($key)} = $value; } else { $self->throw( "Parameter \"$key\" does not exist!" ); } } } my $closure = $self->{'prset'}->{'tratiopr'}->{'_string'}; my $value = $self->{'prset'}->{'tratiopr'}->{'beta'}; return ->( 'beta', $value ); } =head2 lset Title : prset Usage : $mrbayes->lset( %lset ); Function: Gets/Sets likelihood settings Returns : A hash of lset options Args : [Optional] A hash of lset options =cut # Gets/Sets the options for the lset command. sub lset { my $self = shift; if ( @_ ) { my %opt; eval { %opt = @_ }; if ( $@ ) { $self->throw( $@ ); } while ( my ( $key, $value ) = each %opt ) { if ( exists $self->{'lset'}->{lc($key)} ) { $self->{'lset'}->{lc($key)} = $value; } else { $self->throw( "Parameter \"$key\" does not exist!" ); } } } return $self->{'lset'}; } =head2 mcmcp Title : mcmcp Usage : $mrbayes->mcmcp( %lset ); Function: Gets/Sets mcmcp settings Returns : A hash of mcmcp options Args : [Optional] A hash of mcmcp options =cut # Gets/sets the options for the mcmcp command sub mcmcp { my $self = shift; if ( @_ ) { my %opt; eval { %opt = @_ }; if ( $@ ) { $self->throw( $@ ); } while ( my ( $key, $value ) = each %opt ) { if ( exists $self->{'mcmcp'}->{lc($key)} ) { $self->{'mcmcp'}->{lc($key)} = $value; } else { $self->throw( "Parameter \"$key\" does not exist!" ); } } } return $self->{'mcmcp'}; } =head2 raw Title : raw Usage : $mrbayes->raw( %lset ); Function: Gets/Sets raw commands Returns : A raw string of mrbayes commands Args : [Optional] A raw string of mrbayes commands =cut # This is for a raw command string (i.e. without perlish internal # representation). Probably not needed, ever (famous last words). sub raw { my $self = shift; $self->{'raw'} = shift if @_; return $self->{'raw'}; } =head2 reset Title : reset Usage : $mrbayes->reset(); Function: Resets mrbayes object to default settings Returns : A default mrbayes object Args : None =cut # At present this just instantiates a new object, but it'd be nice to be able # to resent only a given command, keeping all the settings applied elsewhere. sub reset { my $self = shift; $self = __PACKAGE__->new; return $self; } =head2 stringify Title : reset Usage : $mrbayes->stringify( $command ); Function: Turns a perlish representation of commands into a string Returns : A raw string of mrbayes commands Args : A $command =cut # Turns argument command into a string representation that MrBayes understands. sub stringify { my ( $self, $command ) = ( $_[0], lc $_[1] ); if ( exists $self->{$command} ) { return $self->_stringify( $command ); } else { $self->throw( "Command \"$command\" does not exist!" ); } } # Iterates over the options of the argument command for $self->stringify( $command ) # and stringifies these also. sub _stringify { my ( $self, $command ) = @_; my $string; if ( ref $self->{$command} eq 'HASH' and %{ $self->{$command} } ) { # calls code reference to stringify main option if ( exists $self->{$command}->{'_string'} ) { while ( my ( $key, $val ) = each %{ $self->{$command} } ) { next if $key eq '_string' or not defined $val; $string .= ' ' . $command . ' ' . $self->{$command}->{'_string'}->( $command, $key, $val ) . ";\n"; } } # calls code reference to stringify sub-options else { foreach my $option ( keys %{ $self->{$command} } ) { my $opt = $self->{$command}->{$option}; if ( ref $opt and ref $opt eq 'HASH' and exists $opt->{'_string'} ) { while ( my ( $key, $val ) = each %{ $self->{$command}->{$option} } ) { next if $key eq '_string' or not defined $val; $string .= ' ' . $command . ' ' . $self->{$command}->{$option}->{'_string'}->( $option, $key, $val ) . ";\n"; } } else { $string .= ' ' . $command . ' ' . $option . '=' . $opt . ";\n" if $opt; } } } } elsif ( $self->{$command} and not ref $self->{$command} ) { $string .= ' ' . $command . '=' . $self->{$command} . ";\n"; } return $string; } # writes nexus file with alignment in *data* block (which is informally # deprecated in favour of the *characters* block, but MrBayes seems to # want it this way). sub _nexfile { my $self = shift; { my $NEXFH; open ( $NEXFH, '>', $self->{'mcmcp'}->{'filename'} ) or die $!; print $NEXFH "#NEXUS\n"; print $NEXFH $self->matrix->to_nexus; # this is a Bio::Phylo::Matrices::Matrix method print $NEXFH "BEGIN MRBAYES;\n"; print $NEXFH "[! MrBayes block written by " . ref $self; print $NEXFH " " . $self->VERSION . " on " . localtime() . " ]\n"; print $NEXFH " log start filename=deleteme.log replace;\n"; foreach my $command ( @{ $self->{'commands'} } ) { my $setting = $self->stringify( $command ); print $NEXFH $setting if $setting; } print $NEXFH " mcmc;\n"; print $NEXFH " " . $self->raw . ";\n" if $self->raw; print $NEXFH "END;\n"; close ( $NEXFH ) or die $!; } return $self->{'mcmcp'}->{'filename'}; } # object destructor. sub DESTROY { my $self = shift; if ( ! $self->save_tempfiles ) { $self->cleanup(); } $self->SUPER::DESTROY(); } =head2 VERSION Title : reset Usage : $mrbayes->VERSION() or Bio::Tools::Run::MrBayes::VERSION; Function: Returns software version Returns : Returns software version Args : None =cut sub VERSION { $VERSION } 1;