use strict; package Bio::CDAT::QIU::CDAT; # # Enforce the following Bio::CDAT::QIU::CDAT rules: # 1. OTUs exist and their names unique # 2. Trees exist and have the exactly the same set of OTUs # 3. Matrices exist and have the exactly the same set of OTUs # use Bio::Tree::Tree; # from bioperl use Bio::CDAT::QIU::CDAT::Matrix; use Bio::CDAT::Root; use vars '@ISA'; @ISA=qw(Bio::CDAT::Root); sub new { my $class = shift; my $self = { 'name' => undef, 'OTUs' => [], # a list of OTU names (uniqueness checked below) 'trees' => [], # a list of Bio::Tree objects 'matrices' => [], # a list of Bio::CDAT::QIU::CDAT::Matrix objects }; bless( $self, $class ); return $self; } sub set_name { my ( $self, $name ) = @_; $self->{'name'} = $name; } sub set_otus { my ( $self, $ref ) = @_; $self->{'OTUs'} = $ref; } sub set_trees { my ( $self, $ref ) = @_; $self->{'trees'} = $ref; } sub set_matrices { my ( $self, $ref ) = @_; $self->{'matrices'} = $ref; } sub get_otus { my $self = shift; if ( &_check_otu( $self->{'OTUs'} ) ) { return $self->{'OTUs'}; } } # Enforce OTU consistency in trees: sub get_trees { my $self = shift; if ( defined $self->{'trees'} ) { foreach my $tree ( @{ $self->{'trees'} } ) { if ( $self->_check_otus_in_tree($tree) ) { return @{ $self->{'trees'} }; } else { $self->throw( 'Bio::CDAT::Exceptions::OTUMismatch', 'OTU inconsistency in trees.', ); } } } else { $self->throw( 'Bio::CDAT::Exceptions::TreeNotFound', 'No tree found!', ); } } sub select_single_tree { my ( $self, $tree_id ) = @_; if ( defined $self->{'trees'} ) { my $tree = $self->{'trees'}->[ $tree_id - 1 ]; if ( $self->_check_otus_in_tree($tree) ) { return $tree; } else { $self->throw( 'Bio::CDAT::Exceptions::OTUMismatch', 'OTU inconsistency in selected tree.', ); } } else { $self->throw( 'Bio::CDAT::Exceptions::TreeNotFound', 'No tree found!', ); } } # Enforce OTU consistency in matrices: sub get_matrices { my $self = shift; if ( defined $self->{'matrices'} ) { foreach my $matrix ( @{ $self->{'matrices'} } ) { if ( $self->_check_otus_in_matrix($matrix) ) { return @{ $self->{'matrices'} }; } else { $self->throw( 'Bio::CDAT::Exceptions::OTUMismatch', 'OTU inconsistency in matrices.', ); } } } else { $self->throw( 'Bio::CDAT::Exceptions::MatrixNotFound', 'No matrix found!', ); } } sub select_single_matrix { my ( $self, $matrix_id ) = @_; if ( defined $self->{'matrices'} ) { my $matrix = $self->{'matrices'}->[ $matrix_id - 1 ]; if ( $self->_check_otus_in_matrix($matrix) ) { return $matrix; } else { $self->throw( 'Bio::CDAT::Exceptions::OTUMismatch', 'OTU inconsistency in selected matrix.', ); } } else { $self->throw( 'Bio::CDAT::Exceptions::MatrixNotFound', 'No matrix found!', ); } } # Enforce OTU existence & uniqueness: sub _check_otu { my $otus = shift; my %count; if ( defined $otus ) { $count{$_}++ for (@$otus); foreach my $name ( keys %count ) { # every otu can only appear once. if ( $count{$name} > 1 ) { __PACKAGE__->throw( 'Bio::CDAT::Exceptions::OTUMismatch', 'Redundant OTUs: $name.', ); } } return 1; } else { __PACKAGE__->throw( 'Bio::CDAT::Exceptions::OTUNotFound', 'No OTUs found!', ); } } sub _check_otus_in_tree { my $self = shift; my $tree = shift; my @nodes = $tree->get_nodes(); my %count; foreach my $tree_otu ( grep { $_->is_Leaf() } @nodes ) { my $id = $tree_otu->id(); $count{$id}++; } # OTU uniqueness checked foreach my $cdat_otu ( @{ $self->get_otus() } ) { $count{$cdat_otu}++; } foreach my $otu ( keys %count ) { # every otu has to count to 2. if ( $count{$otu} != 2 ) { $self->throw( 'Bio::CDAT::Exceptions::OTUMismatch', 'Inconsistent OTU in tree and Bio::CDAT::QIU::CDAT: $otu.', ); } } return 1; } sub _check_otus_in_matrix { my $self = shift; my $matrix = shift; my %count; foreach my $mat_otu ( @{ $matrix->get_otus() } ) { $count{$mat_otu}++; } # OTU uniqueness checked foreach my $cdat_otu ( @{ $self->get_otus() } ) { $count{$cdat_otu}++; } # every otu has to count to 2. foreach my $otu ( keys %count ) { if ( $count{$otu} != 2 ) { $self->throw( 'Bio::CDAT::Exceptions::OTUMismatch', 'Inconsistent OTU in matrix and Bio::CDAT::QIU::CDAT: $otu.', ); } } return 1; } # # Not implemented Bio::CDAT::QIU::CDAT methods # sub consistency_index { my ( $self, $char_id ) = @_; my $index; return $index; } sub likelihood { my ( $self, $char_id, $model ) = @_; my $like; return $like; } 1; =head1 NAME Bio::CDAT::QIU::CDAT - A Bharacter-Bata-Bnd-Bree Class =head1 SYNOPSIS use Bio::CdatIO; my $in=Bio::CdatIO->new(-file=>shift @ARGV, -format=>'nexus'); my $cdat=$in->next_cdat(); my $forest=$cdat->get_trees(); =head1 DESCRIPTION Organisms are related by a common ancestry, often repesented by a tree-like phylogeny. As a result, the state of a character (e.g., sequence, morphology, behavior) of an orgnism is constrained by its evolutionary legacy. The joint process of inheritance and modification can be modeled by a Markov chain process with probabilities of transitions between different character states specified. Combined analysis of characters and phylogeny allows inference of character states of ancestors. Results from ancestral reconstruction are often used to test mechanisms of character evolution, including recombination, horizontal gene transfer, adaptive sequence changes, and correlated character evolution. A Bio::CDAT::QIU::CDAT object is defined as a set of uniquely named OTUs that are related to each other by one or more phylogenies, and have properties represented by one or more character matrices. The Bio::CDAT::QIU::CDAT class is designed to facilitate the co-analysis of a character matrix (e.g., a multiple sequence alignment) and a tree. We tried not to replicate the existing tree or alignment classes (e.g., Bio::Tree and Bio::SimpleAlign in BioPerl). It is more strictly typed than Bio::Phylo and Bio::Nexus, two similar phylogenetic packages. For example, the Bio::CDAT::QIU::CDAT class offers native Bio::CDAT::QIU::CDAT methods including parsimony reconstruction of ancestral states, customized visual display of character-data-and-tree data, and calculation of consistency indices or likelihood scores. The Bio::CDAT::QIU::CDAT::Factory interface allows instantiation of Bio::CDAT::QIU::CDAT objects from dedicated phylogenetics packages (e.g., MrBayes, PHYLIP, PAUP*, PAML). =head1 SEE ALSO L, L, L, L, L =cut