#!/usr/bin/python
'''Defines RecIDCM a configurable class for recursive, iterative Divide-and-Conquer Tree Searching of Roshan, et al. CSB 2004.'''
import sys, re, copy
from PIPRes.cipres_types import *
from PIPRes.util.server_import import *
from PIPRes.util.client_import import *
from PIPRes.wrap.idl import hasEdgeLengths
def getPrunedCopy(inTree, leafSet):
t = CipresTree(inTree)
t.pruneTo(leafSet, True)
t.refreshLeafSet()
return t
def recidcmImproverIndexFuncFullVsSubTree(subtree, **kwargs):
'''Returns -1 (for the last tree improver) if subtree has all of the leaves (it is the full tree), otherwise returns 0.
Can be used as an "improverIndexFunc" to RecIDCM to yield a method that uses one tree improver (the first) during the recursion, and another (the last) for improving the full tree.'''
startingNLeaves = kwargs['driver'].startingNLeaves
if subtree.getNLeaves() == startingNLeaves:
return -1
return 0
def recidcmImproverIndexFuncSubtreeOnly(subtree, **kwargs):
'''Returns None if subtree has all of the leaves, otherwise returns 0. This suppresses searching on the full tree.
Can be used as an "improverIndexFunc" to RecIDCM to yield a method that uses one tree improver (the first) during the recursion, and and does not try to improve the final, merged and refined tree'''
startingNLeaves = kwargs['driver'].startingNLeaves
if subtree.getNLeaves() == startingNLeaves:
return None
return 0
class RecIDCM(SimpleServer):
'''A tree improver that uses CIPRES services to implement recursive, iterative DCM3.
Keyword arguments:
registry: reference to a CIPRES registry interface (default = getCipresRegistry()).
storeIntermediates: Boolean determines whether intermediate (inferred subtrees, unrefined trees, etc) will be stored (default = False)
Controlling iteration
maxIter: integer, the number of iterations of recursive DCM. 0 means that improveTree will not do any searching. (default = 1)
continueIteratingFunc: reference to a callable python object which will be called if the iteration number is < maxIter to determine if iteration
should continue. the call will be continueIteratingFunc(tree, iterationNumber, driver = self) where "self" is the RecIDCM object.
Controlling recursion
maxRecursion: maximum recursion depth. If depth > maxRecursion, then a treeImprover will be called regardless of the tree size. (default = 1)
maxNLeaves: a recursion stopping rule. If a subtree.m_nLeaves <= maxNLeaves, then recursion will stop and the tree will be improved (default and minimum = 4)
improverIndexFunc: reference to a callable python object which will be called to supply the index of the tree improver to use (see section on servants below).
During recursion this object will get calls with the signature treeImproverIndexFunc(subTree, driver = self, attName = 'treeImprovers', recursionDepth = self.recDepth)
Returning None indicates that no tree improver is appropriate (recursion should continue), otherwise the returned value should be the index in the list of tree improvers
If the function returns None (or no function is supplied) and recursion is stopped by maxRecursion or maxNLeaves, then treeImprover[defaultSubtreeImproverIndex] will be used for subtrees
and the last element will be used for improving the full tree(default = None).
Function must return None for the full tree, if you wish to suppress improving the full tree.
see validIndexStopsRecursion.
defaultSubtreeImproverIndex: index of subtree improver if improverIndexFunc does not supply one and recursion is stopped.
validIndexStopsRecursion: Boolean. If improverIndexFunc returns an integer (instead of None), but maxRecursion or maxNLeaves do not stop recursion, the validIndexStopsRecursion
setting will determine if recursion is stopped.
Servants, if any of the following servants is unspecified, then the registry will be used to obtain a (default) implementation when needed.
treeDecomposer: reference to object that implements leafSetDecompose.
treeMerger: reference to super-tree implementation: mergeTrees method.
treeRefiner: reference to an object that refines trees with polytomies to bifurcating trees (refineTree method)
treeImprovers: a list of treeImprover object references (see improverIndexFunc).
numTreeImprovers: length of treeImprovers list (list will be cropped or padded with None if treeImprovers and numTreeImprovers do not agree)
servantConfiguration: string to string dictionary mapping a servant specifier ('treeDecomposer', 'treeMerger', 'treeRefiner', 'treeImprovers') to the text command to send them
via the execute() method. Keys may be omitted if no configuration is needed. The 'treeImprovers' key should map to a list of commands. (default is {})
'''
def __init__(self, **kwargs):
'''See RecIDCM class help for discussion of the keyword arguments.'''
self.registryWrapper = kwargs.get('registry')
SimpleServer.__init__(self, self.registryWrapper)
self.treeImprovers = kwargs.get('treeImprovers')
nTreeImprovers = kwargs.get('numTreeImprovers')
nTreeImproversGiven = self.treeImprovers is not None and len(self.treeImprovers) or 0
if nTreeImproversGiven == 0:
if nTreeImprovers is None or nTreeImprovers < 1:
self.treeImprovers = [None, None]
if kwargs.get('servantConfiguration') is None:
kwargs['servantConfiguration'] = { 'treeImprovers': ['Default HS swap=TBR nomultrees;', 'Default HS swap=NNI multrees=no;']}
else:
self.treeImprovers = [None] * nTreeImprovers
elif nTreeImprovers is not None and nTreeImprovers > 0:
if nTreeImprovers > nTreeImproversGiven:
self.treeImprovers.extend([None] * (nTreeImprovers - nTreeImproversGiven))
elif nTreeImprovers < nTreeImproversGiven:
del self.treeImprovers[nTreeImprovers:]
self.treeImproversRegEntry = kwargs.get('treeImproverRegEntries', [])
def_reg_entry = composeRegistryQuery(interface='TreeImprove', applicationName='org.cipres.paup-wrap')
while len(self.treeImproversRegEntry) < len(self.treeImprovers):
self.treeImproversRegEntry.append(def_reg_entry)
for n in range(len(self.treeImprovers)):
if (self.treeImprovers[n] is None) and (self.treeImproversRegEntry[n] is None):
self.treeImproversRegEntry[n] = def_reg_entry
self.treeDecomposeObjRef = kwargs.get('treeDecomposer')
self.treeMergeObjRef = kwargs.get('treeMerger')
self.treeRefineObjRef = kwargs.get('treeRefiner')
self.maxIter = int(kwargs.get('maxIter', 1)) # !
self.maxRecursion = int(kwargs.get('maxRecursion', 1))
self.maxNLeaves = int(kwargs.get('maxNLeaves', 4))
if self.maxNLeaves < 4:
self.maxNLeaves = 4
self.treeImproverIndexFunc = kwargs.get('improverIndexFunc')
self.validIndexStopsRecursion = kwargs.get('validIndexStopsRecursion', False)
self.continueIterating = kwargs.get('continueIteratingFunc')
self.defaultSubTreeImproverIndex = int(kwargs.get('defaultSubtreeImproverIndex', 0))
self.storeIntermediates = bool(kwargs.get('storeIntermediates', False))
self.intermediates = []
self.servantConfiguration = kwargs.get('servantConfiguration', {})
self.loggingIndentString = '%80s' % ' '
def setServantConfiguration(self, cmdDict):
self.servantConfiguration = cmdDict
def _useServantToImproveTree(self, servant, subTree):
'''Uses self.treeImproverObjRef to improve a tree. Assumes that self.treeImproverObjRef.setMatrix has been called.'''
servant.setTree(subTree)
return servant.improveTree(CORBA.Object._nil)
def _dcmFixedMatrix(self, subTree):
loggingIndent = self.loggingIndentString[:1 + self.recDepth]
recurseDepthExceeeded = self.recDepth >= self.maxRecursion
subTreeNLeaves = subTree.m_nLeaves
_LOG.debug('subTreeNLeaves=%d' % subTreeNLeaves)
_LOG.debug('%s_dcmFixedMatrix %s\n%ssubTreeNLeaves = %d\n' % (loggingIndent, subTree.m_newick, loggingIndent, subTreeNLeaves))
treeImproverIndex = None
if self.treeImproverIndexFunc is not None:
mnl = subTree.m_nLeaves
subTree = CipresTree(subTree)
subTree.m_nLeaves = mnl
treeImproverIndex = self.treeImproverIndexFunc(subTree, driver = self, attName = 'treeImprovers', recursionDepth = self.recDepth)
if (self.validIndexStopsRecursion and treeImproverIndex is not None) or recurseDepthExceeeded or subTreeNLeaves <= self.maxNLeaves:
if treeImproverIndex is None:
treeImproverIndex = self.defaultSubTreeImproverIndex
_LOG.debug('%sSmall enough. call tree improver %d (out of %d)...\n' % (loggingIndent, treeImproverIndex, len(self.treeImprovers)))
t = self._useServantToImproveTree(self.treeImprovers[treeImproverIndex], subTree)
_LOG.debug('%s returned self.treeImproverObjRef.improveTree = %s\n' % (loggingIndent, t.m_newick))
return t
self.recDepth += 1
_LOG.debug('%sToo big. decompose...\n' % loggingIndent)
import pdb
try:
leafSets = self.treeDecomposeObjRef.leafSetDecompose(subTree)
except CipresIDL_api1.TreeParseException:
sys.exit('Tree Parse Exception on the tree %s' % str(subTree.m_newick))
_LOG.debug('%sleaf sets: \n%s %s\n\n' % (loggingIndent, loggingIndent, ('\n'+loggingIndent + ' ').join([str(i) for i in leafSets])))
smallerTreesList = []
for lsNum, leafSet in enumerate(leafSets):
prunedTree = getPrunedCopy(subTree, leafSet)
prunedTree.m_nLeaves = len(leafSet) #assure _dcmFixedMatrix call has m_nLeaves member
if self.storeIntermediates:
self.intermediates.append(('%d.%d.%d.start' % (self.nIters, self.recDepth, lsNum), prunedTree.m_newick))
improvedPrunedTree = self._dcmFixedMatrix(prunedTree)
smallerTreesList.append(improvedPrunedTree)
if self.storeIntermediates:
self.intermediates.append(('%d.%d.%d' % (self.nIters, self.recDepth, lsNum), improvedPrunedTree.m_newick))
_LOG.debug('%smerging: \n%s %s\n' % (loggingIndent, loggingIndent ,('\n'+loggingIndent + ' ').join([i.m_newick for i in smallerTreesList])))
tree = CipresTree(self.treeMergeObjRef.mergeTrees(smallerTreesList))
_LOG.debug('%smerge returned:%s\n\n' % (loggingIndent, tree.m_newick))
if self.storeIntermediates:
self.intermediates.append(('%d.%d.merged' % (self.nIters, self.recDepth), tree.m_newick))
_LOG.debug('%srefining:%s\n' % (loggingIndent, str(tree.m_newick)))
self.treeRefineObjRef.setTree(tree)
refined = self.treeRefineObjRef.refineTree()
_LOG.debug('%srefine returned:%s\n\n' % (loggingIndent, str(refined.m_newick)))
refined.m_nLeaves = subTreeNLeaves #assure _dcmFixedMatrix call has m_nLeaves member
self.recDepth -= 1
return CipresTree(refined)
def _passMatrixToServants(self, matrix):
for treeImp in self.treeImprovers:
treeImp.setMatrix(matrix)
self.treeRefineObjRef.setMatrix(matrix)
def improveTree(self, tree, matrix):
'''Uses recursive, iterative DCM3 to infer a tree. "tree" will be used as the starting tree, and the "matrix" will be the data used to evaluate trees.
Currently all model-choice, scoring rules, etc must be specified by configuring the treeImprove servants.'''
if not hasEdgeLengths(tree):
raise ValueError, 'Starting tree must have edge lengths (or the decomposition will not work).'
self.startingTree = tree
self.intermediates = []
self._incarnateServants()
if not self._configureServants():
raise AssertionError, 'Servant configuration failed'
self.nIters = 0
self._passMatrixToServants(matrix)
if not hasattr(tree, 'm_nLeaves'):
tree.m_nLeaves = tree.getNLeaves() #assure _dcmFixedMatrix call has m_nLeaves member
self.startingNLeaves = tree.m_nLeaves
if self.storeIntermediates:
self.intermediates.append(('%d.0.initial' % self.nIters, tree.m_newick))
while self.nIters < self.maxIter:
if self.continueIterating is not None:
mnl = tree.m_nLeaves
tree = CipresTree(tree)
tree.m_nLeaves= mnl
if not self.continueIterating(tree, nIterations = self.nIters, driver = self):
break
_LOG.debug('not done, another loop\n')
self.recDepth = 0 #temp
tree = self._dcmFixedMatrix(tree)
if self.storeIntermediates:
self.intermediates.append(('%d.0.refined' % self.nIters, tree.m_newick))
_LOG.debug('recDCM returned %s\n' % tree.m_newick)
#default is to use the last tree improver for the full tree
treeImproverIndex = -1
if self.treeImproverIndexFunc is not None:
tree = CipresTree(tree)
treeImproverIndex = self.treeImproverIndexFunc(tree, driver = self, attName = 'treeImprovers', recursionDepth = self.recDepth)
if treeImproverIndex is not None:
tree = self._useServantToImproveTree(self.treeImprovers[treeImproverIndex], tree)
if self.storeIntermediates:
self.intermediates.append(('%d.0.full' % self.nIters, tree.m_newick))
_LOG.debug('full tree search (using treeImprovers[%d] out of %d) returned %s\n' % (treeImproverIndex, len(self.treeImprovers), tree.m_newick))
tree.m_nLeaves = self.startingNLeaves #assure _dcmFixedMatrix call has m_nLeaves member
self.nIters += 1
return tree
def _passConfigToServant(self, servant, cmd, cmdKey):
_LOG.debug('%s execute "%s"' % (cmdKey, cmd))
result = servant.execute(cmd)
ready = True
if not result[0]:
ready = False
_LOG.error(result[1])
elif len(result[1]) > 0:
_LOG.info('%s configuration output: %s' %(cmdKey, result[1]))
return ready
def _configureServants(self):
assert(self.treeImprovers[0] is not None)
cfgDict = self.servantConfiguration
ready = True
for cmdKey, servant in [ ('treeRefiner', self.treeRefineObjRef),
('treeDecomposer', self.treeDecomposeObjRef),
('treeMerger', self.treeMergeObjRef)]:
cmd = cfgDict.get(cmdKey)
if cmd and (servant is not None):
ready = self._passConfigToServant(servant, cmd, cmdKey) and ready
cmds = cfgDict.get('treeImprovers', [])
for n, cmdAndServantcmdServant in enumerate(zip(cmds, self.treeImprovers)):
cmd, cmdServant = cmdAndServantcmdServant[0], cmdAndServantcmdServant[1]
if cmd and (cmdServant is not None):
ready = self._passConfigToServant(cmdServant, cmd, 'treeImprove[%d]' % n) and ready
return ready
def _incarnateServants(self):
if self.treeRefineObjRef is not None and self.treeDecomposeObjRef is not None and self.treeMergeObjRef is not None:
allInstantiated = True
for ti in self.treeImprovers:
if ti is None:
allInstantiated = False
break
if allInstantiated:
return
if self.registryWrapper is None:
self.registryWrapper = getCipresRegistry()
if self.registryWrapper is None:
raise ValueError, 'Registry interface is invalid. Cannot create servant objects.'
registryWrapper = self.registryWrapper
for n, ti in enumerate(self.treeImprovers):
if ti is None:
self.treeImprovers[n] = registryWrapper.getServiceOrThrow(CipresIDL_api1.TreeImprove, registryEntry=self.treeImproversRegEntry[n])
if not self.treeRefineObjRef:
self.treeRefineObjRef = registryWrapper.getServiceOrThrow(CipresIDL_api1.TreeRefine)
if not self.treeDecomposeObjRef:
self.treeDecomposeObjRef = registryWrapper.getServiceOrThrow(CipresIDL_api1.TreeDecompose)
if not self.treeMergeObjRef:
self.treeMergeObjRef = registryWrapper.getServiceOrThrow(CipresIDL_api1.TreeMerge)
def removeServants(self):
_LOG.debug('RecIDCM removeServants()')
for o in self.treeImprovers + [self.treeRefineObjRef, self.treeDecomposeObjRef, self.treeMergeObjRef]:
try:
if o is not None:
_LOG.debug('calling remove on servant')
o.remove()
except:
pass
self.treeImprovers = [None] * len(self.treeImprovers)
self.treeRefineObjRef = None
self.treeDecomposeObjRef = None
self.treeMergeObjRef = None
def __del__(self):
self.removeServants()
_LOG = cipresGetLogger('pipres.service_impl.client.tree_improve')
def improveTreeRecIDCM(tree, matrix, **kwargs):
'''Uses RecIDCM to try to improve the "tree" argument. The data should be sent in "matrix" (CipresDiscreteMatrix or CipresIDL_api1.DataMatrix)
See help on RecIDCM for a description of the keyword arguments.
Returns CipresIDL_api1.Tree object (or if store intermediates is selected, the tree and list of strings describing the intermediate trees).'''
_LOG.debug('creating RecIDCM object')
dcm = RecIDCM(**kwargs)
try:
_LOG.debug('calling dcm.improveTree')
t = dcm.improveTree(tree, matrix)
finally:
dcm.removeServants()
if kwargs.get('storeIntermediates', False):
return t, dcm.intermediates
return t
RecIDCM.ui_xml = '''
JPanelIntegertrue1The number of iterations of recursive DCM to perform (see also continueIteratingFunc)maxIter = $maxIter$maxItergreater_than0isGreaterThanZeroMaxtrees must be greater than 0Python MethodfalseNoneA function with the signture fxn(subtree, numIters, **kwargs) may be specified. This function should return False to stop iteration.continueIterating = $continueIteratingIntegertrue20Decomposition will continue until the tree has fewer than maxNLeaves (or maxRecursion is hit, see also improverIndexFunc)maxNLeaves = $maxNLeaves$maxNLeavesgreater_than0isGreaterThanZeromaxNLeaves must be greater than 0 (any value under 5 mean that maxNLeaves never stops the recursion).Integertrue5This setting limits the number of recursive decompositions.maxRecursion = $maxRecursion$maxRecursiongreater_than-1isGreaterThanZeroMaxRecursionLevels must be 0 or greater.Integertrue1numTreeImprovers = $numTreeImprovers$MaxRecursionLevelsgreater_than0isGreaterThanZeroAt least one tree improver must be used.ListtruetreeImprovers = [$treeImprovers]List of tree improver objects used during search. Standard order is the small tree improvers first, followed by the improver to use on larger trees.Interface TreeImprovetrue$treeImproverObjlen($treeImprovers)equals0hasRightNumElementsnumTreeImprovers must match the length of treeImproversPython MethodfalseNoneA function with the signture fxn(subtree, **kwargs) may be specified. This function will be called to decide which tree improver to use.treeImproverIndexFunc = $treeImproverIndexFuncBooleantruevalidIndexStopsRecursion=$validIndexStopsRecursionTrueFalseIntegertrue0Index in the treeImprovers list of the treeImprover to use when recursion is halted by maxRecursion or maxNLeaves (not by improverIndexFunc)defaultSubtreeImproverIndex = $defaultSubtreeImproverIndex$maxRecursiongreater_than-1isGreaterThanZeroMaxRecursionLevels must be 0 or greater.Interface TreeDecomposetrueModule used to break the tree into subsets of leaves$treeDecomposerInterface TreeMergetrueSupertree module used after subtrees searches have occurred$treeMergerInterface TreeRefinetrueModule resolve polytomies in the merged trees$treeRefinerBooleanfalsestoreIntermediates=$storeIntermediatesTrueFalse
'''
if __name__ == '__main__':
launchedAsServer = hasServerArgs(sys.argv)
try:
if launchedAsServer:
cipresServe(sys.argv, {'TreeImprove' : RecIDCM}, factoryRegistryKeyword = 'registry')
else:
registryWrapper, mat, startingTree = cipresClientInit(sys.argv, returnMatrix = True, returnTree = True)
dcm = RecIDCM(registry = registryWrapper, maxIter = 2, maxNLeaves = 20, maxRecursion = 3)
try:
_LOG.debug('launchedAsServer = ' + str(launchedAsServer))
_LOG.debug('creating RecIDCM object')
if not mat:
sys.exit('must specify a NEXUS file with a data matrix using -filename arguments')
if not startingTree:
sys.exit('must specify a NEXUS file with a tree using -filename arguments')
_LOG.debug('calling dcm.improveTree')
t = dcm.improveTree(startingTree, mat)
_LOG.debug('Tree returned =%s\n\nRemoving servants.\n' % (t.m_newick))
print 'returned: %s\n score = %s' % (t.m_newick, str(t.m_score))
finally:
dcm.removeServants()
except:
logException(_LOG)