#!/usr/bin/python '''Defines RecIDCM a configurable class for recursive, iterative Divide-and-Conquer Tree Searching of Roshan, et al. CSB 2004.''' import sys, re, copy from PIPRes.cipres_types import * from PIPRes.util.server_import import * from PIPRes.util.client_import import * from PIPRes.wrap.idl import hasEdgeLengths def getPrunedCopy(inTree, leafSet): t = CipresTree(inTree) t.pruneTo(leafSet, True) t.refreshLeafSet() return t def recidcmImproverIndexFuncFullVsSubTree(subtree, **kwargs): '''Returns -1 (for the last tree improver) if subtree has all of the leaves (it is the full tree), otherwise returns 0. Can be used as an "improverIndexFunc" to RecIDCM to yield a method that uses one tree improver (the first) during the recursion, and another (the last) for improving the full tree.''' startingNLeaves = kwargs['driver'].startingNLeaves if subtree.getNLeaves() == startingNLeaves: return -1 return 0 def recidcmImproverIndexFuncSubtreeOnly(subtree, **kwargs): '''Returns None if subtree has all of the leaves, otherwise returns 0. This suppresses searching on the full tree. Can be used as an "improverIndexFunc" to RecIDCM to yield a method that uses one tree improver (the first) during the recursion, and and does not try to improve the final, merged and refined tree''' startingNLeaves = kwargs['driver'].startingNLeaves if subtree.getNLeaves() == startingNLeaves: return None return 0 class RecIDCM(SimpleServer): '''A tree improver that uses CIPRES services to implement recursive, iterative DCM3. Keyword arguments: registry: reference to a CIPRES registry interface (default = getCipresRegistry()). storeIntermediates: Boolean determines whether intermediate (inferred subtrees, unrefined trees, etc) will be stored (default = False) Controlling iteration maxIter: integer, the number of iterations of recursive DCM. 0 means that improveTree will not do any searching. (default = 1) continueIteratingFunc: reference to a callable python object which will be called if the iteration number is < maxIter to determine if iteration should continue. the call will be continueIteratingFunc(tree, iterationNumber, driver = self) where "self" is the RecIDCM object. Controlling recursion maxRecursion: maximum recursion depth. If depth > maxRecursion, then a treeImprover will be called regardless of the tree size. (default = 1) maxNLeaves: a recursion stopping rule. If a subtree.m_nLeaves <= maxNLeaves, then recursion will stop and the tree will be improved (default and minimum = 4) improverIndexFunc: reference to a callable python object which will be called to supply the index of the tree improver to use (see section on servants below). During recursion this object will get calls with the signature treeImproverIndexFunc(subTree, driver = self, attName = 'treeImprovers', recursionDepth = self.recDepth) Returning None indicates that no tree improver is appropriate (recursion should continue), otherwise the returned value should be the index in the list of tree improvers If the function returns None (or no function is supplied) and recursion is stopped by maxRecursion or maxNLeaves, then treeImprover[defaultSubtreeImproverIndex] will be used for subtrees and the last element will be used for improving the full tree(default = None). Function must return None for the full tree, if you wish to suppress improving the full tree. see validIndexStopsRecursion. defaultSubtreeImproverIndex: index of subtree improver if improverIndexFunc does not supply one and recursion is stopped. validIndexStopsRecursion: Boolean. If improverIndexFunc returns an integer (instead of None), but maxRecursion or maxNLeaves do not stop recursion, the validIndexStopsRecursion setting will determine if recursion is stopped. Servants, if any of the following servants is unspecified, then the registry will be used to obtain a (default) implementation when needed. treeDecomposer: reference to object that implements leafSetDecompose. treeMerger: reference to super-tree implementation: mergeTrees method. treeRefiner: reference to an object that refines trees with polytomies to bifurcating trees (refineTree method) treeImprovers: a list of treeImprover object references (see improverIndexFunc). numTreeImprovers: length of treeImprovers list (list will be cropped or padded with None if treeImprovers and numTreeImprovers do not agree) servantConfiguration: string to string dictionary mapping a servant specifier ('treeDecomposer', 'treeMerger', 'treeRefiner', 'treeImprovers') to the text command to send them via the execute() method. Keys may be omitted if no configuration is needed. The 'treeImprovers' key should map to a list of commands. (default is {}) ''' def __init__(self, **kwargs): '''See RecIDCM class help for discussion of the keyword arguments.''' self.registryWrapper = kwargs.get('registry') SimpleServer.__init__(self, self.registryWrapper) self.treeImprovers = kwargs.get('treeImprovers') nTreeImprovers = kwargs.get('numTreeImprovers') nTreeImproversGiven = self.treeImprovers is not None and len(self.treeImprovers) or 0 if nTreeImproversGiven == 0: if nTreeImprovers is None or nTreeImprovers < 1: self.treeImprovers = [None, None] if kwargs.get('servantConfiguration') is None: kwargs['servantConfiguration'] = { 'treeImprovers': ['Default HS swap=TBR nomultrees;', 'Default HS swap=NNI multrees=no;']} else: self.treeImprovers = [None] * nTreeImprovers elif nTreeImprovers is not None and nTreeImprovers > 0: if nTreeImprovers > nTreeImproversGiven: self.treeImprovers.extend([None] * (nTreeImprovers - nTreeImproversGiven)) elif nTreeImprovers < nTreeImproversGiven: del self.treeImprovers[nTreeImprovers:] self.treeImproversRegEntry = kwargs.get('treeImproverRegEntries', []) def_reg_entry = composeRegistryQuery(interface='TreeImprove', applicationName='org.cipres.paup-wrap') while len(self.treeImproversRegEntry) < len(self.treeImprovers): self.treeImproversRegEntry.append(def_reg_entry) for n in range(len(self.treeImprovers)): if (self.treeImprovers[n] is None) and (self.treeImproversRegEntry[n] is None): self.treeImproversRegEntry[n] = def_reg_entry self.treeDecomposeObjRef = kwargs.get('treeDecomposer') self.treeMergeObjRef = kwargs.get('treeMerger') self.treeRefineObjRef = kwargs.get('treeRefiner') self.maxIter = int(kwargs.get('maxIter', 1)) # ! self.maxRecursion = int(kwargs.get('maxRecursion', 1)) self.maxNLeaves = int(kwargs.get('maxNLeaves', 4)) if self.maxNLeaves < 4: self.maxNLeaves = 4 self.treeImproverIndexFunc = kwargs.get('improverIndexFunc') self.validIndexStopsRecursion = kwargs.get('validIndexStopsRecursion', False) self.continueIterating = kwargs.get('continueIteratingFunc') self.defaultSubTreeImproverIndex = int(kwargs.get('defaultSubtreeImproverIndex', 0)) self.storeIntermediates = bool(kwargs.get('storeIntermediates', False)) self.intermediates = [] self.servantConfiguration = kwargs.get('servantConfiguration', {}) self.loggingIndentString = '%80s' % ' ' def setServantConfiguration(self, cmdDict): self.servantConfiguration = cmdDict def _useServantToImproveTree(self, servant, subTree): '''Uses self.treeImproverObjRef to improve a tree. Assumes that self.treeImproverObjRef.setMatrix has been called.''' servant.setTree(subTree) return servant.improveTree(CORBA.Object._nil) def _dcmFixedMatrix(self, subTree): loggingIndent = self.loggingIndentString[:1 + self.recDepth] recurseDepthExceeeded = self.recDepth >= self.maxRecursion subTreeNLeaves = subTree.m_nLeaves _LOG.debug('subTreeNLeaves=%d' % subTreeNLeaves) _LOG.debug('%s_dcmFixedMatrix %s\n%ssubTreeNLeaves = %d\n' % (loggingIndent, subTree.m_newick, loggingIndent, subTreeNLeaves)) treeImproverIndex = None if self.treeImproverIndexFunc is not None: mnl = subTree.m_nLeaves subTree = CipresTree(subTree) subTree.m_nLeaves = mnl treeImproverIndex = self.treeImproverIndexFunc(subTree, driver = self, attName = 'treeImprovers', recursionDepth = self.recDepth) if (self.validIndexStopsRecursion and treeImproverIndex is not None) or recurseDepthExceeeded or subTreeNLeaves <= self.maxNLeaves: if treeImproverIndex is None: treeImproverIndex = self.defaultSubTreeImproverIndex _LOG.debug('%sSmall enough. call tree improver %d (out of %d)...\n' % (loggingIndent, treeImproverIndex, len(self.treeImprovers))) t = self._useServantToImproveTree(self.treeImprovers[treeImproverIndex], subTree) _LOG.debug('%s returned self.treeImproverObjRef.improveTree = %s\n' % (loggingIndent, t.m_newick)) return t self.recDepth += 1 _LOG.debug('%sToo big. decompose...\n' % loggingIndent) import pdb try: leafSets = self.treeDecomposeObjRef.leafSetDecompose(subTree) except CipresIDL_api1.TreeParseException: sys.exit('Tree Parse Exception on the tree %s' % str(subTree.m_newick)) _LOG.debug('%sleaf sets: \n%s %s\n\n' % (loggingIndent, loggingIndent, ('\n'+loggingIndent + ' ').join([str(i) for i in leafSets]))) smallerTreesList = [] for lsNum, leafSet in enumerate(leafSets): prunedTree = getPrunedCopy(subTree, leafSet) prunedTree.m_nLeaves = len(leafSet) #assure _dcmFixedMatrix call has m_nLeaves member if self.storeIntermediates: self.intermediates.append(('%d.%d.%d.start' % (self.nIters, self.recDepth, lsNum), prunedTree.m_newick)) improvedPrunedTree = self._dcmFixedMatrix(prunedTree) smallerTreesList.append(improvedPrunedTree) if self.storeIntermediates: self.intermediates.append(('%d.%d.%d' % (self.nIters, self.recDepth, lsNum), improvedPrunedTree.m_newick)) _LOG.debug('%smerging: \n%s %s\n' % (loggingIndent, loggingIndent ,('\n'+loggingIndent + ' ').join([i.m_newick for i in smallerTreesList]))) tree = CipresTree(self.treeMergeObjRef.mergeTrees(smallerTreesList)) _LOG.debug('%smerge returned:%s\n\n' % (loggingIndent, tree.m_newick)) if self.storeIntermediates: self.intermediates.append(('%d.%d.merged' % (self.nIters, self.recDepth), tree.m_newick)) _LOG.debug('%srefining:%s\n' % (loggingIndent, str(tree.m_newick))) self.treeRefineObjRef.setTree(tree) refined = self.treeRefineObjRef.refineTree() _LOG.debug('%srefine returned:%s\n\n' % (loggingIndent, str(refined.m_newick))) refined.m_nLeaves = subTreeNLeaves #assure _dcmFixedMatrix call has m_nLeaves member self.recDepth -= 1 return CipresTree(refined) def _passMatrixToServants(self, matrix): for treeImp in self.treeImprovers: treeImp.setMatrix(matrix) self.treeRefineObjRef.setMatrix(matrix) def improveTree(self, tree, matrix): '''Uses recursive, iterative DCM3 to infer a tree. "tree" will be used as the starting tree, and the "matrix" will be the data used to evaluate trees. Currently all model-choice, scoring rules, etc must be specified by configuring the treeImprove servants.''' if not hasEdgeLengths(tree): raise ValueError, 'Starting tree must have edge lengths (or the decomposition will not work).' self.startingTree = tree self.intermediates = [] self._incarnateServants() if not self._configureServants(): raise AssertionError, 'Servant configuration failed' self.nIters = 0 self._passMatrixToServants(matrix) if not hasattr(tree, 'm_nLeaves'): tree.m_nLeaves = tree.getNLeaves() #assure _dcmFixedMatrix call has m_nLeaves member self.startingNLeaves = tree.m_nLeaves if self.storeIntermediates: self.intermediates.append(('%d.0.initial' % self.nIters, tree.m_newick)) while self.nIters < self.maxIter: if self.continueIterating is not None: mnl = tree.m_nLeaves tree = CipresTree(tree) tree.m_nLeaves= mnl if not self.continueIterating(tree, nIterations = self.nIters, driver = self): break _LOG.debug('not done, another loop\n') self.recDepth = 0 #temp tree = self._dcmFixedMatrix(tree) if self.storeIntermediates: self.intermediates.append(('%d.0.refined' % self.nIters, tree.m_newick)) _LOG.debug('recDCM returned %s\n' % tree.m_newick) #default is to use the last tree improver for the full tree treeImproverIndex = -1 if self.treeImproverIndexFunc is not None: tree = CipresTree(tree) treeImproverIndex = self.treeImproverIndexFunc(tree, driver = self, attName = 'treeImprovers', recursionDepth = self.recDepth) if treeImproverIndex is not None: tree = self._useServantToImproveTree(self.treeImprovers[treeImproverIndex], tree) if self.storeIntermediates: self.intermediates.append(('%d.0.full' % self.nIters, tree.m_newick)) _LOG.debug('full tree search (using treeImprovers[%d] out of %d) returned %s\n' % (treeImproverIndex, len(self.treeImprovers), tree.m_newick)) tree.m_nLeaves = self.startingNLeaves #assure _dcmFixedMatrix call has m_nLeaves member self.nIters += 1 return tree def _passConfigToServant(self, servant, cmd, cmdKey): _LOG.debug('%s execute "%s"' % (cmdKey, cmd)) result = servant.execute(cmd) ready = True if not result[0]: ready = False _LOG.error(result[1]) elif len(result[1]) > 0: _LOG.info('%s configuration output: %s' %(cmdKey, result[1])) return ready def _configureServants(self): assert(self.treeImprovers[0] is not None) cfgDict = self.servantConfiguration ready = True for cmdKey, servant in [ ('treeRefiner', self.treeRefineObjRef), ('treeDecomposer', self.treeDecomposeObjRef), ('treeMerger', self.treeMergeObjRef)]: cmd = cfgDict.get(cmdKey) if cmd and (servant is not None): ready = self._passConfigToServant(servant, cmd, cmdKey) and ready cmds = cfgDict.get('treeImprovers', []) for n, cmdAndServantcmdServant in enumerate(zip(cmds, self.treeImprovers)): cmd, cmdServant = cmdAndServantcmdServant[0], cmdAndServantcmdServant[1] if cmd and (cmdServant is not None): ready = self._passConfigToServant(cmdServant, cmd, 'treeImprove[%d]' % n) and ready return ready def _incarnateServants(self): if self.treeRefineObjRef is not None and self.treeDecomposeObjRef is not None and self.treeMergeObjRef is not None: allInstantiated = True for ti in self.treeImprovers: if ti is None: allInstantiated = False break if allInstantiated: return if self.registryWrapper is None: self.registryWrapper = getCipresRegistry() if self.registryWrapper is None: raise ValueError, 'Registry interface is invalid. Cannot create servant objects.' registryWrapper = self.registryWrapper for n, ti in enumerate(self.treeImprovers): if ti is None: self.treeImprovers[n] = registryWrapper.getServiceOrThrow(CipresIDL_api1.TreeImprove, registryEntry=self.treeImproversRegEntry[n]) if not self.treeRefineObjRef: self.treeRefineObjRef = registryWrapper.getServiceOrThrow(CipresIDL_api1.TreeRefine) if not self.treeDecomposeObjRef: self.treeDecomposeObjRef = registryWrapper.getServiceOrThrow(CipresIDL_api1.TreeDecompose) if not self.treeMergeObjRef: self.treeMergeObjRef = registryWrapper.getServiceOrThrow(CipresIDL_api1.TreeMerge) def removeServants(self): _LOG.debug('RecIDCM removeServants()') for o in self.treeImprovers + [self.treeRefineObjRef, self.treeDecomposeObjRef, self.treeMergeObjRef]: try: if o is not None: _LOG.debug('calling remove on servant') o.remove() except: pass self.treeImprovers = [None] * len(self.treeImprovers) self.treeRefineObjRef = None self.treeDecomposeObjRef = None self.treeMergeObjRef = None def __del__(self): self.removeServants() _LOG = cipresGetLogger('pipres.service_impl.client.tree_improve') def improveTreeRecIDCM(tree, matrix, **kwargs): '''Uses RecIDCM to try to improve the "tree" argument. The data should be sent in "matrix" (CipresDiscreteMatrix or CipresIDL_api1.DataMatrix) See help on RecIDCM for a description of the keyword arguments. Returns CipresIDL_api1.Tree object (or if store intermediates is selected, the tree and list of strings describing the intermediate trees).''' _LOG.debug('creating RecIDCM object') dcm = RecIDCM(**kwargs) try: _LOG.debug('calling dcm.improveTree') t = dcm.improveTree(tree, matrix) finally: dcm.removeServants() if kwargs.get('storeIntermediates', False): return t, dcm.intermediates return t RecIDCM.ui_xml = ''' JPanel Integer true 1 The number of iterations of recursive DCM to perform (see also continueIteratingFunc) maxIter = $maxIter $maxIter greater_than 0 isGreaterThanZero Maxtrees must be greater than 0 Python Method false None A function with the signture fxn(subtree, numIters, **kwargs) may be specified. This function should return False to stop iteration. continueIterating = $continueIterating Integer true 20 Decomposition will continue until the tree has fewer than maxNLeaves (or maxRecursion is hit, see also improverIndexFunc) maxNLeaves = $maxNLeaves $maxNLeaves greater_than 0 isGreaterThanZero maxNLeaves must be greater than 0 (any value under 5 mean that maxNLeaves never stops the recursion). Integer true 5 This setting limits the number of recursive decompositions. maxRecursion = $maxRecursion $maxRecursion greater_than -1 isGreaterThanZero MaxRecursionLevels must be 0 or greater. Integer true 1 numTreeImprovers = $numTreeImprovers $MaxRecursionLevels greater_than 0 isGreaterThanZero At least one tree improver must be used. List true treeImprovers = [$treeImprovers] List of tree improver objects used during search. Standard order is the small tree improvers first, followed by the improver to use on larger trees. Interface TreeImprove true $treeImproverObj len($treeImprovers) equals 0 hasRightNumElements numTreeImprovers must match the length of treeImprovers Python Method false None A function with the signture fxn(subtree, **kwargs) may be specified. This function will be called to decide which tree improver to use. treeImproverIndexFunc = $treeImproverIndexFunc Boolean true validIndexStopsRecursion=$validIndexStopsRecursion True False Integer true 0 Index in the treeImprovers list of the treeImprover to use when recursion is halted by maxRecursion or maxNLeaves (not by improverIndexFunc) defaultSubtreeImproverIndex = $defaultSubtreeImproverIndex $maxRecursion greater_than -1 isGreaterThanZero MaxRecursionLevels must be 0 or greater. Interface TreeDecompose true Module used to break the tree into subsets of leaves $treeDecomposer Interface TreeMerge true Supertree module used after subtrees searches have occurred $treeMerger Interface TreeRefine true Module resolve polytomies in the merged trees $treeRefiner Boolean false storeIntermediates=$storeIntermediates True False ''' if __name__ == '__main__': launchedAsServer = hasServerArgs(sys.argv) try: if launchedAsServer: cipresServe(sys.argv, {'TreeImprove' : RecIDCM}, factoryRegistryKeyword = 'registry') else: registryWrapper, mat, startingTree = cipresClientInit(sys.argv, returnMatrix = True, returnTree = True) dcm = RecIDCM(registry = registryWrapper, maxIter = 2, maxNLeaves = 20, maxRecursion = 3) try: _LOG.debug('launchedAsServer = ' + str(launchedAsServer)) _LOG.debug('creating RecIDCM object') if not mat: sys.exit('must specify a NEXUS file with a data matrix using -filename arguments') if not startingTree: sys.exit('must specify a NEXUS file with a tree using -filename arguments') _LOG.debug('calling dcm.improveTree') t = dcm.improveTree(startingTree, mat) _LOG.debug('Tree returned =%s\n\nRemoving servants.\n' % (t.m_newick)) print 'returned: %s\n score = %s' % (t.m_newick, str(t.m_score)) finally: dcm.removeServants() except: logException(_LOG)