#!/usr/bin/python '''Currently implements tree searching via a file-based wrapper around PAUP4.0b10''' import os from PIPRes.util.server_import import * from PIPRes.cipres_types import * from PIPRes.wrap.nexus_program_server import * from PIPRes.wrap.output_parser import * from PIPRes.service_impl.split_freq_supplier import SplitFreqSupplier from PIPRes.util.cipres import getCipresProperty from PIPRes.splits import splitToNewick from PIPRes.cipres_types import * from PIPRes.util.io import cipresGetLogger _LOG = cipresGetLogger('pipres.service_impl.paup_wrap.py') class ConstraintType: DONT_ENFORCE, MONOPHYLETIC, BACKBONE, CONVEX, NOT_MONOPHYLETIC, NOT_BACKBONE, NOT_CONVEX = range(7) toPaupType = ['', 'MONOPHYLY', 'BACKBONE', 'CONVEX', 'MONOPHYLY', 'BACKBONE, CONVEX'] def isConverse(t): return t >= ConstraintType.NOT_MONOPHYLETIC isConverse = staticmethod(isConverse) class PaupWrap(object, CipresIDL_api1__POA.TreeImprove, CipresIDL_api1__POA.TreeInfer, CipresIDL_api1__POA.TreeEvaluate, SimpleNexusWrapperServer): debugging = True def __init__(self, registry, paupPath): self.debugging = PaupWrap.debugging SimpleNexusWrapperServer.__init__(self, registry) self.convertTreeFunc = self.orb is None and toTreeBridge or toIDLTree self.registry = registry self.mat = None self.tree = None self.nexusReader = None self.bootSearchDefault = 'Default HSearch nomultrees swap=spr ' self.bootDefTemplate = 'Default Boot nreps = %d cutoffpct=0 grpfreq=yes ' self.searchRNGSeeder = None self.bootRNGSeeder = None self.bootFreqFile = 'bootfreqs.log' self.nBootReps = 100 self.treeImproveDefault = 'Default HSearch nomultrees swap=spr ' self.treeInferDefault = 'Default HSearch nomultrees swap=spr start=stepwise ' self.treeEvaluateDefault = '' self.futurePaup = False # in future releases paup will support searching on partial leaf sets # if we aren't using future paup, we need to translate the taxon labels if not os.path.exists(paupPath): raise ValueError, 'The specified path to PAUP (%s) does not exist.' % paupPath self.paupPath = paupPath self.constraints = [] self.criterion = OptimalityCriterion.PARSIMONY self.model = None self.taxLabels = None self.taxManager = None def setCriterion(self, crit): if isinstance(crit, str): self._criterion = OptimalityCriterion.asStr.index(crit) else: OptimalityCriterion.asStr[crit] # raises IndexError if invalid self._criterion = crit def getCriterion(self): return self._criterion criterion = property(getCriterion, setCriterion) def getSearchSeedFragment(self): if self.searchRNGSeeder is None: return '' return ' rseed = %d ' % self.searchRNGSeeder.externalSeed() def getBootSeedFragment(self): if self.bootRNGSeeder is None: return '' return ' bseed = %d ' % self.bootRNGSeeder.externalSeed() def setConstraint(self, split, typeOfConstraint = ConstraintType.MONOPHYLETIC): if typeOfConstraint == ConstraintType.DONT_ENFORCE: if split is None: if self.constraints: self.constraints[0] = ConstraintType.DONT_ENFORCE return self.constraints = [typeOfConstraint, None] newick = None if isinstance(split, long) or isinstance(split, int): if self.taxLabels is not None: newick = splitToNewick(split, taxLabels=self.taxLabels) else: assert(self.mat is not None) newick = splitToNewick(split, nTaxa=len(self.mat), starting=1) elif isinstance(split, str): newick = split.strip() if newick.startswith('.') or newick.startswith('*'): newick = splitRepToSplit(newick) else: if not isinstance(split, Tree): raise TypeError, 'split must be long, newick string, or tree' if self.taxManager is not None: self.constraints[1] = CipresTree(newick, taxaManager=self.taxManager) else: self.constraints[1] = numberedLeafTree(newick) def setSearchSeed(self, seed): if self.searchRNGSeeder is None: self.searchRNGSeeder = RNGSeeder() self.searchRNGSeeder.seed(seed) def setBootSeed(self, seed): if self.bootRNGSeeder is None: self.bootRNGSeeder = RNGSeeder() self.bootRNGSeeder.seed(seed) def getBootDefaults(self): bootCmd = self.bootDefTemplate % self.nBootReps return bootCmd + self.getBootSeedFragment() def getBootSearchDefaults(self): return self.bootSearchDefault + self.getSearchSeedFragment() def touch(self): self.splitFreq = {} def setMatrix(self, mat): _LOG.debug('setMatrix called\n') # _LOG.debug('matrix = %s\n' % mat) self.mat = mat self.touch() def setTree(self, tree): _LOG.debug('setTree called. Converting arg to CipresTree\n') self.tree = CipresTree(tree) _LOG.debug('setTree called with arg:\n') _LOG.debug(self.tree.m_newick) self.touch() def _calcSplitsIfEmpty(self): if len(self.splitFreq) == 0: if not self.mat: raise RuntimeError, 'Failed to call setMatrix before requesting split frequency' self.bootstrapToGetSplits() def getSplitsTable(self): self._calcSplitsIfEmpty() return self.splitFreq def readPScores(self, scoreFile): pscores = readPaupPScores(scoreFile) _LOG.debug('pscores read from %s' % scoreFile) return pscores def readLScores(self, scoreFile, model, longFmt = True, demandHeader = True): lscores = readPaupLScores(scoreFile, model, demandHeader=demandHeader, longFmt=longFmt) _LOG.debug('lscores read from %s' % scoreFile) return lscores def readDScores(self, scoreFile): dscores = readPaupDScores(scoreFile) _LOG.debug('dscores read from %s' % scoreFile) return dscores def readTrees(self, treeFile): try: self.nexusReaderLock.acquire() except: sys.exit('YIKES') try: if self.exiting: return nilTree() if self.nexusReader is None: _LOG.debug('getting ReadNexus object ref') self.nexusReader = self.registry.getService(CipresIDL_api1.ReadNexus) if self.nexusReader is None: raise RuntimeError, 'Could not acquire a ReadNexus servant' try: self.nexusReader.readNexusFile(treeFile, CipresIDL_api1.ReadNexus.NEXUS_TREES_BLOCK_BIT) return self.nexusReader.getTrees(0) finally: #TEMP pass#nr, self.nexusReader = self.nexusReader, None #nr.remove() finally: self.nexusReaderLock.release() def _paupHSearch(self, startingTree = None, configCmd = ''): return self._paupAnalysis(startingTree, configCmd, searchCmd = 'HSearch') def _embedInPaupBlock(self, s): 'accepts a string or list of strings (each of which is a command). adds paup block and semicolons.''' b = [ 'BEGIN PAUP', 'log start file=data.log append', ] if isinstance(s, str): b.append(s) else: b.extend(s) b.extend([ 'log stop', 'quit', 'END;']) return ';\t\n'.join(b) def _getScoreCmdPrefix(self, criterion): if criterion == OptimalityCriterion.PARSIMONY: return 'Pscore /' if criterion == OptimalityCriterion.LIKELIHOOD: return 'Lscore /LongFmt = yes ' return 'dscore /' def getInitializationCmd(self): '''Returns a command to be placed at the beginning of a paup block to set up the env. Currently this just sets the optimality criterion.''' settings = 'Set criterion = %s noerrorbeep nokeybeep noquerybeep ; ' % OptimalityCriterion.asStr[self.criterion] constraints = self.getConstraintsCmds() if constraints: return '%s;%s' % (settings, constraints) return settings def getScoreCmd(self, filename): prefix = self._getScoreCmdPrefix(self.criterion) suffix = filename and ("scorefile = \'%s\' " % filename ) or '' return '%s %s' % (prefix, suffix) def _paupAnalysis(self, startingTree, configCmd, searchCmd = 'HSearch'): startTreeCmd = (searchCmd.upper() == 'HSEARCH') and (startingTree is not None) and 'start = 1' or '' extInvContext = self._createTempDataFile( matrix=self.mat, tree=startingTree, supportsTaxaSubsets=self.futurePaup, dirPref='paupWrap') extraPaths = [os.path.splitext(extInvContext.filename)[0] + os.path.extsep + 'log'] try: scoreFile, treeFile = ('score', 'tree') cmdList = [ self.getInitializationCmd(), configCmd, searchCmd + ' ' + startTreeCmd, 'sorttrees', self.getScoreCmd(scoreFile), "savetrees from =1 to=1 file = \'%s\' TaxaBlk BrLens = yes" % treeFile ] extInvContext.fileObj.write(self._embedInPaupBlock(cmdList)) extInvContext.fileObj.close() self._callProcess(extInvContext.tempDir, [self.paupPath, '-n', str(extInvContext.filename)]) # commandline or extInvContext.filename limit in paup means that we have to chdir and use relative paths scoreFile = os.path.join(extInvContext.tempDir, scoreFile) treeFile = os.path.join(extInvContext.tempDir, treeFile) extraPaths.append(scoreFile) extraPaths.append(treeFile) if self.criterion == OptimalityCriterion.PARSIMONY: scores = self.readPScores(scoreFile) elif self.criterion == OptimalityCriterion.LIKELIHOOD: # readLScores return [[lnL, model], [lnL, model]...] # here we pull out just the score. scores = [i[0] for i in self.readLScores(scoreFile, self.model, longFmt = True, demandHeader = True)] else: scores = self.readDScores(scoreFile) treeToReturn = self.readTrees(treeFile)[0] finally: if not self.debugging: self.cleanupTemporaries(extInvContext.tempDir, [extInvContext.filename] + extraPaths) else: _LOG.debug('leaving the temporary directory %s (running in debug mode)' % extInvContext.tempDir) extTrans = not self.futurePaup and extInvContext.fromExternalTranslation or None t = addScoreAndMapLeaves(treeToReturn, scores[0], self.criterion, extTrans) return self.convertTreeFunc(t) def evaluateTree(self, proxyConsumer): try: _LOG.debug('evaluateTree called') if not (self.mat and self.tree): raise RuntimeError, 'Failed to call setTree and setMatrix before evaluateTree' return self._paupAnalysis(self.tree, self.treeEvaluateDefault + ' ; ' + self.userCommands(), searchCmd = '') except: logException(_LOG) return nilTree() def improveTree(self, proxyConsumer): try: _LOG.debug('improveTree called') if not (self.mat and self.tree): raise RuntimeError, 'Failed to call setTree and setMatrix before improveTree' return self._paupHSearch(self.tree, self.treeImproveDefault + ' ; ' + self.userCommands()) except: logException(_LOG) return nilTree() def inferTree(self, proxyConsumer): try: _LOG.debug('inferTree called') if not self.mat: raise RuntimeError, 'Failed to call setMatrix before inferTree' return self._paupHSearch(None, self.treeInferDefault + ' ; ' + self.userCommands()) except: logException(_LOG) return nilTree() def bootstrapToGetSplits(self): extInvContext = self._createTempDataFile( matrix=self.mat, tree=None, supportsTaxaSubsets=self.futurePaup, dirPref='paupWrap') extraPaths = [os.path.splitext(extInvContext.filename)[0] + os.path.extsep + 'log', self.bootFreqFile] try: cmdList = [ self.getInitializationCmd(), self.getBootSearchDefaults(), 'log stop', 'log file=%s start replace' % self.bootFreqFile, self.getBootDefaults(), self.userCommands(), 'boot', 'log stop', 'log file = data.log append start', ] extInvContext.fileObj.write(self._embedInPaupBlock(cmdList)) extInvContext.fileObj.close() self._callProcess(extInvContext.tempDir, [self.paupPath, '-n', str(extInvContext.filename)]) # commandline or extInvContext.filename limit in paup means that we have to chdir and use relative paths splitsFile = os.path.join(extInvContext.tempDir, self.bootFreqFile) try: self.splitFreq = {} self.splitFreq = readPAUPLogForSplitFreqs(splitsFile) except SplitsNotFoundError: pass finally: if not self.debugging: self.cleanupTemporaries(extInvContext.tempDir, [extInvContext.filename] + extraPaths) else: _LOG.debug('leaving the temporary directory %s (running in debug mode)' % extInvContext.tempDir) def enforceConstraint(self, constraint, demandSplits = True): '''Adds a split of tree to the constaints. if "demandSplits" is false, then a converse constraint is used. Only affects searching (not tree evaluation)''' self.constraints = [] def getConstraintsCmds(self): if (not self.constraints) or self.constraints[0] == ConstraintType.DONT_ENFORCE: return '' t = self.constraints[0] conCmd = 'constraints theConstraint (%s) = %s' % (ConstraintType.toPaupType[t], self.constraints[1].getNewick()) searchCmd = 'default hsearch enforce constraints=theConstraint converse=%s' % (ConstraintType.isConverse(t) and 'yes' or 'no') return '%s;%s' %(conCmd, searchCmd) def _getPathToPaupFromArgv(argv): PaupWrap.debugging = '-debug' in argv if len(argv) < 2: _LOG.error(' '.join(argv)) raise ValueError, 'Expecting the path to Paup as an argument' return argv[1] def createPaupWrap(registry, argv): return PaupWrap(registry, _getPathToPaupFromArgv([0] + argv)) # we add an empty element to act like the script name in a sys.argv list def defaultPaupWrap(): '''Returns a MBTwoExpWrap object with a cipresDefaultRegistry, and path to mbTwoExp from the environment.''' return PaupWrap(cipresDefaultRegistry(), getCipresProperty('paupPath')) if __name__=='__main__': argv = sys.argv paupPath = _getPathToPaupFromArgv(argv) def PaupWrapFactory(registry, pathToPaup = paupPath): return PaupWrap(registry, pathToPaup) try: interfaceDict = {} for k in ['TreeEvaluate', 'TreeImprove', 'TreeInfer']: interfaceDict[k] = PaupWrapFactory cipresServe(argv, interfaceDict) except: logException(_LOG)