import CORBA from PIPRes.service_impl.split_freq_supplier import SplitFreqSupplier from PIPRes.cipres_types import CipresTree, scoreDifference from PIPRes.service_impl.paup_wrap import ConstraintType #@ temporary location for this enum from PIPRes.tree import numberedLeafTree from PIPRes.util.io import cipresGetLogger _LOG = cipresGetLogger('service_impl.param_boot') class _ParamBootFreqSupplier(SplitFreqSupplier): def __init__(self, simulator, treeInferer, realMatrixTreeInferer, splits): '''Presumably treeInferer and realMatrixTreeInferer are the same (but not required). treeInferer must also fulfil the TreeEvaluate interface''' self.simulator = simulator self.treeInferer = treeInferer self.splits = splits self.splitsTable = {} self.realMatrix = None self.realMatrixTreeInferer = realMatrixTreeInferer def getSplitsTable(self): if not self.splitsTable: self._paramBoot() return self.splitsTable def setMatrix(self, matrix): self.realMatrix = matrix self.splitsTable = {} def _paramBoot(self): raise NotImplementedError, '_ParamBootFreqSupplier is an abstract base.''' class ParamBootFreqSupplier(_ParamBootFreqSupplier): ''' Performs parametric bootstrapping to evaluate the support for splits. Model tree is the best tree (as judged by the real data) that does NOT have the split. Test statistic is the difference between the best tree and the best tree without the split. Null distribution simulated by recording the difference in score between the true (model) tree and the preferred tree for each replicate. ''' def _paramBoot(self): assert(self.realMatrix) # The first step is to infer the model tree for the simulations self.realMatrixTreeInferer.setMatrix(self.realMatrix) splits = self.splits # we do an unconstrained search self.realMatrixTreeInferer.setConstraint(None, ConstraintType.DONT_ENFORCE) optimalTree = CipresTree(self.realMatrixTreeInferer.inferTree(CORBA.Object._nil)) if not splits: splits = copy.deepcopy(optimalTree.getSplits()) criterion = self.realMatrixTreeInferer.getCriterion() self.splitsTable = {} eventConsumer = CORBA.Object._nil for spl in splits: self.realMatrixTreeInferer.setConstraint(spl, ConstraintType.NOT_MONOPHYLETIC) modelTree = CipresTree(self.realMatrixTreeInferer.inferTree(CORBA.Object._nil)) scoreDiff = scoreDifference(criterion, optimalTree.m_score, modelTree.m_score) if float(scoreDiff) < 0.00001: # @arbitrary cutoff for unsupported split !!!! self.splitsTable[spl] = 0.0 else: self.simulator.setTree(modelTree) # we assure that the constraints are taken off in case self.realMatrixTreeInferer is self.treeInferer self.treeInferer.setConstraint(None, ConstraintType.DONT_ENFORCE) n = 0 repsMoreExtreme = 0 for n, matrix in enumerate(iter(self.simulator)): try: _LOG.info(str(matrix)) _LOG.debug('\n#PARAM BOOT REALIZATION %d #\n'% n) self.treeInferer.setMatrix(matrix) tree = CipresTree(self.treeInferer.inferTree(eventConsumer)) #if tree.hasSplit(spl): # self.treeInferer.setConstraint(spl, ConstraintType.NOT_MONOPHYLETIC) # lackSplitTree = CipresTree(self.realMatrixTreeInferer.inferTree(CORBA.Object._nil)) # self.treeInferer.setConstraint(None, ConstraintType.DONT_ENFORCE) self.treeInferer.setTree(modelTree) scoredModelTree = self.treeInferer.evaluateTree(eventConsumer) simRepScoreDiff = scoreDifference(criterion, tree.m_score, scoredModelTree.m_score) _LOG.debug('simRepScoreDiff=%f scoreDiff=%' % (simRepScoreDiff, scoreDiff)) if simRepScoreDiff >= scoreDiff: repsMoreExtreme += 1 except: self.splitsTable = {} raise nRealizations = float(n + 1) self.splitsTable[spl] = (nRealizations - float(repsMoreExtreme))/nRealizations #print 'SCOREDIFFS', str(self.splitsTable) #posDK = [i for i in self.splitsTable.itervalues() if i >= 0.000001] #if len(posDK) >1: # sys.exit('found multiple splits with positive DK indices') #import os #os.system('/bin/sh /Users/mholder/bin/cleanCIPRESTemps.sh') return self.splitsTable class CIParamBootFreqSupplier(_ParamBootFreqSupplier): ''' Performs parametric bootstrapping to produce a confidence set of trees. Support is then summarized by the number of times a clade is seen in this set of trees''' def _paramBoot(self): assert(self.realMatrix) # The first step is to infer the model tree for the simulations self.realMatrixTreeInferer.setMatrix(self.realMatrix) splits = self.splits # we do an unconstrained search self.realMatrixTreeInferer.setConstraint(None, ConstraintType.DONT_ENFORCE) eventConsumer = CORBA.Object._nil modelTree = CipresTree(self.realMatrixTreeInferer.inferTree(eventConsumer)) mtSplits = modelTree.getSplits() if not splits: splits = copy.deepcopy(mtSplits) self.splitsTable = {} st = {} for spl in splits: st[spl] = 0.0 for spl in mtSplits: st[spl] = 0.0 sixTree = numberedLeafTree('(1,(2,3),4)') tenTree = numberedLeafTree('(1,(2,4),3)') twelveTree = numberedLeafTree('((1,2),3,4)') self.simulator.setTree(modelTree) for n, matrix in enumerate(iter(self.simulator)): _LOG.info(str(matrix)) _LOG.debug('\n#PARAM BOOT REALIZATION %d #\n'% n) self.treeInferer.setMatrix(matrix) tree = CipresTree(self.treeInferer.inferTree(eventConsumer)) _LOG.warn('Using 4 taxon hack, because has split does not seem to be working') spl = None if tree == sixTree: spl = 6L elif tree == tenTree: spl = 10L elif tree == twelveTree: spl = 12L if spl is not None: st[spl] = st[spl] + 1.0 nRealizations = float(n + 1) for spl in splits: self.splitsTable[spl] = st[spl]/nRealizations return self.splitsTable