#!/usr/bin/python """Currently implements tree searching via a file-based wrapper around GARLIv0.951""" import os, sys import re from PIPRes.util.server_import import * from PIPRes.wrap.external_program import * #from PIPRes.cipres_types import * from PIPRes.util.cipres import getCipresProperty from PIPRes.util.io import cipresGetLogger _LOG = cipresGetLogger('garli_wrap.py') from cStringIO import StringIO _garliRMatPat =re.compile(r'.*[rR]\s*([^rfapiRFAPI]+)\s*([rfapiRFAPI].*)') _garliFreqPat = re.compile(r'.*[fF]\s*([^rfapiRFAPI]+)\s*([rfapiRFAPI].*)') _garliShapePat = re.compile(r'.*[aA]\s*([.0-9]+)\s*([rfapiRFAPI].*)') _garliPInvarPat = re.compile(r'.*[iIpP]\s*([.0-9]+)\s*([rfapiRFAPI].*)') class GarliModel: def __init__(self, garliFmt=None, **kwargs): """A nucleotide model description. valid kwargs: rMat, freqs, gammaShape, pInvar, garliFmt""" self.nStates = 4 if garliFmt: #ASK DZ m = _garliRMatPat.match(garliFmt) rMat = m and m.group(1).strip().split() or None m = _garliFreqPat.match(garliFmt) freqs = m and m.group(1).strip().split() or None m = _garliShapePat.match(garliFmt) self.gammaShape = not m and 0.5 or float(m.group(1)) m = _garliPInvarPat.match(garliFmt) self.pInvar = m and float(m.group(1)) or 0.0 else: rMat = kwargs.get('rMat') freqs = kwargs.get('freqs') self.gammaShape = kwargs.get('gammaShape', 0.5) self.pInvar = kwargs.get('pInvar', 0.0) rMat = rMat or [1.0, 5.0, 1.0, 1.0, 5.0] if len(rMat) == 6: gtRate = rMat[5] self.relRates = [i/gtRate for i in rates[:5]] else: assert(len(rMat) == 5) self.relRates = rMat freqs = freqs or [.25, .25, .25] notLastFreq = sum(freqs[:self.nStates - 1]) assert(len(freqs) > self.nStates - 2) self.freqs = freqs if len(freqs) == self.nStates: sf = notLastFreq + freqs[self.nStates - 1] assert(approxEqual(sf, 1.0)) else: assert(len(freqs) == self.nStates - 1) self.freqs.append(1.0 - notLastFreq) for f in self.freqs: assert(f >= 0.0) for r in self.relRates: assert(r >= 0.0) assert(self.pInvar >= 0.0 and self.pInvar <= 1.0) assert(self.gammaShape > 0) def __str__(self): return 'r %s b %s a %f p %f ' % ( ' '.join(map(str, self.relRates)), ' '.join(map(str, self.freqs)), self.gammaShape, self.pInvar, ) class GarliWrap(object, CipresIDL_api1__POA.TreeImprove, CipresIDL_api1__POA.TreeInfer, SimpleExternalWrapperServer): general_opts_and_d = [ ("datafname", 'inf'), ("constraintfile", "none"), #no ui ("streefname", 'random'), ("ofprefix", 'outf'), #no ui ("randseed", -1), ("availablememory", 512), ("logevery", 10), ("saveevery", 100), ("refinestart", 1), ("outputeachbettertopology", 1), ("enforcetermconditions", 1), ("genthreshfortopoterm", 10000), ("scorethreshforterm", 0.05), ("significanttopochange", 0.01), ("outputphyliptree", 0), #no ui ("outputmostlyuselessfiles", 0), #no ui ("writecheckpoints", 0), #no ui ("restart", 0), #no ui ("ratematrix", "6rate"), ("statefrequencies", "estimate"), ("ratehetmodel", "gamma"), ("numratecats", 4), ("invariantsites", "estimate"), ] master_opts_and_d = [ ("nindivs", 4), ("holdover", 1), ("selectionintensity", .5), ("holdoverpenalty", 0), ("stopgen", 5000000), ("stoptime", 5000000), ("startoptprec", 0.5), ("minoptprec", 0.01), ("numberofprecreductions", 20), ("treerejectionthreshold", 50.0), ("topoweight", 1.0), ("modweight", 0.05), ("brlenweight", 0.2), ("randnniweight", 0.1), ("randsprweight", 0.3), ("limsprweight", 0.6), ("intervallength", 100), ("intervalstostore", 5), ("limsprrange", 6), ("meanbrlenmuts", 5), ("gammashapebrlen", 1000), ("gammashapemodel", 1000), ("uniqueswapbias", 0.1), ("distanceswapbias", 1.0), ("bootstrapreps", 0), ("inferinternalstateprobs", 0), ] generalOpts = [i[0] for i in general_opts_and_d] masterOpts = [i[0] for i in master_opts_and_d] defaultConfig = dict(general_opts_and_d + master_opts_and_d) _defaultTag = 'outputtag' debugging = False def __init__(self, registry, garliPath): self.garliPath = garliPath self.debugging = GarliWrap.debugging SimpleExternalWrapperServer.__init__(self, registry) self.convertTreeFunc = self.orb is None and toTreeBridge or toIDLTree self.registry = registry self.mat = None self.tree = None self.userConfigCmd = {} self.config = dict(GarliWrap.defaultConfig) self.model = GarliModel() def setMatrix(self, mat): _LOG.debug('setMatrix called\n') self.mat = mat def parseCommandToDict(self, cmd): _LOG.debug('parsing %s' % cmd) cmdStream = cmd.split(';') d = {} for c in cmdStream: s = c.strip() if s and (not s.startswith('[')): spl = s.split('=') if len(spl) < 2: raise ValueError, 'Expecting = in GARLI configuration command. Found:\n%s\n' % c key = spl[0].strip().lower() val = '='.join(spl[1:]).strip() d[key] = val return d def execute(self, cmd): super(GarliWrap, self).execute(cmd) # call super so that self.userConfigCmdList is updated (for logging) try: newCmdDict = self.parseCommandToDict(cmd) self.userConfigCmd.update(newCmdDict) self.config.update(newCmdDict) except ValueError, x: return False, str(x) return True, '' def setTree(self, tree): _LOG.debug('setTree called. Converting arg to CipresTree and refining\n') self.tree = CipresTree(tree) # garli requires binary input trees: self.tree.randomRefine() _LOG.debug('setTree called with arg:\n') _LOG.debug(self.tree.m_newick) def improveTree(self, proxyConsumer): _LOG.debug('improveTree called') if not (self.mat and self.tree): raise RuntimeError, 'Failed to call setTree and setMatrix before improveTree' return self._garliSearch(self.tree) def inferTree(self, proxyConsumer): _LOG.debug('inferTree called') if not self.mat: raise RuntimeError, 'Failed to call setMatrix before inferTree' return self._garliSearch(None) def writeConfFile(self, file_obj): genOptsStr = '\n'.join(['%s = %s' % (i, str(self.config[i])) for i in GarliWrap.generalOpts]) masterOptsStr = '\n'.join(['%s = %s' % (i, str(self.config[i])) for i in GarliWrap.masterOpts]) file_obj.write("[general]\n%s\n[master]\n%s\n" % (genOptsStr, masterOptsStr)) def _garliSearch(self, startTree): # @todo not thread safe until we deal with the # fact that _createTempDataFile stores the tree in self.mappedTree # for now, we'll just make sure that calls _garliSearch are serialized assert(self.__dict__.get('mappedTree') is None) extInvContext = self._createTempDataFile( matrix=self.mat, tree=startTree, dirPref='garliWrap', matrixFormat=MatrixFormat.RelaxedPHYLIP, treeForTaxaOnly=True ) extInvContext.matrixFileObj.close() confFileName = 'garli.conf' outTag = self.config['ofprefix'] bestTreeFileName = outTag + '.best.tre' extraPaths = [bestTreeFileName, confFileName, extInvContext.matrixFilename, extInvContext.matrixFilename + '.comp'] if startTree is not None: extInvContext.treeFilename = 'start.tre' extInvContext.treeFileObj = open(os.path.join(extInvContext.tempDir, extInvContext.treeFilename), 'w') extInvContext.treeFileObj.write("%s %s;\n" % (str(self.model), self.mappedTree.getNewick())) extInvContext.treeFileObj.close() extraPaths.append(extInvContext.treeFilename) else: extInvContext.treeFilename = None numberedPaths = [outTag + '.log%.02d.log', outTag + '.treelog%02d.log'] try: cfg = None try: cfg = open(os.path.join(extInvContext.tempDir, confFileName), 'w') self.config['datafname'] = extInvContext.matrixFilename self.config['streefname'] = extInvContext.treeFilename is not None and extInvContext.treeFilename or 'random' self.writeConfFile(cfg) finally: if cfg is not None: cfg.close() _LOG.debug('Calling %s from %s' % (self.garliPath, extInvContext.tempDir)) self._callProcess(extInvContext.tempDir, [self.garliPath]) finalTreePath = os.path.join(extInvContext.tempDir, bestTreeFileName) score, modelString, treeString = parseGarliBestTreeFile(finalTreePath) treeToReturn = CipresTree(treeString) finally: self.mappedTree = None #@todo if not self.debugging: self.cleanupTemporaries(extInvContext.tempDir, extraPaths, numberedPaths) else: _LOG.debug('leaving the temporary directory %s (running in debug mode)' % extInvContext.tempDir) t = addScoreAndMapLeaves(treeToReturn, score, OptimalityCriterion.LIKELIHOOD, extInvContext.fromExternalTranslation) return self.convertTreeFunc(t) def createGarliWrap(registry, argv): return GarliWrap(registry, _getPathToPaupFromArgv([0] + argv)) # we add an empty element to act like the script name in a sys.argv list def defaultGarliWrap(): """Returns a MBTwoExpWrap object with a cipresDefaultRegistry, and path to mbTwoExp from the environment.""" return GarliWrap(cipresDefaultRegistry(), getCipresProperty('registryGarli')) _garliBestTree = re.compile(r'^tree\s+(\S+)\s*=\s*\[..\]\[([-0-9.]+)\]\[(.*)\]\s*(\S*);') def parseGarliBestTreeStream(stream): """Return (score, model, treeString) from a file like object that follows the format of a Garli *.best.tre file""" global _garliBestTree for line in stream: m = _garliBestTree.match(line) if m: g = m.groups() score = float(g[1]) model = g[2].strip() tree = g[3] return score, model, tree raise ValueError, 'did not find tree.' def parseGarliBestTreeFile(filepath): "Return (score, model, treeString) from a Garli *.best.tre file" inStream = open(filepath, 'rU') try: return parseGarliBestTreeStream(inStream) finally: inStream.close() return [i[1] for i in tsPairs] def _getPathToGarliFromArgv(argv): GarliWrap.debugging = '-debug' in argv if len(argv) < 2: _LOG.error(' '.join(argv)) raise ValueError, 'Expecting the path to Garli as an argument' gp = argv[1] if not os.path.exists(gp): raise ValueError, 'Expecting the path to Garli as an argument' return gp if __name__=='__main__': argv = sys.argv garliPath = _getPathToGarliFromArgv(argv) def GarliWrapFactory(registry, pathToGarli = garliPath): return GarliWrap(registry, pathToGarli) try: interfaceDict = {} for k in ['TreeEvaluate', 'TreeImprove', 'TreeInfer']: interfaceDict[k] = GarliWrapFactory cipresServe(argv, interfaceDict) except: logException(_LOG) ############################################################################### # What follows are the GARLI0.941 settings and defaults: ############################################################################### # # generalOpts = [ # ("datafname",'inf'), # ("streefname",'random'), # ("ofprefix",'outf'), # ("randseed",-1), # ("megsclamemory",500), # ("availablememory",512), # ("logevery",10), # ("saveevery",100), # ("refinestart",1), # ("outputeachbettertopology",1), # ("enforcetermconditions",1), # ("genthreshfortopoterm",2000), # ("scorethreshforterm",.05), # ("significanttopochange",0.05), # ("outputphyliptree",0), # ("outputmostlyuselessfiles",0), # ("dontinferproportioninvariant",0), # ] # # masterOpts = [ # ("nindivs",4), # ("holdover",1), # ("selectionintensity",.5), # ("holdoverpenalty",0), # ("stopgen",5000000), # ("stoptime",5000000), # ("startoptprec",.5), # ("minoptprec",.01), # ("numberofprecreductions",40), # ("topoweight",1.0), # ("modweight",.05), # ("brlenweight",.2), # ("randnniweight",.2), # ("randsprweight",.3), # ("limsprweight",.5), # ("intervallength",100), # ("intervalstostore",5), # ("limsprrange",6), # ("meanbrlenmuts",5), # ("gammashapebrlen",1000), # ("gammashapemodel",1000), # ("bootstrapreps",0), # ("inferinternalstateprobs",0), # ] # ############################################################################### # End GARLI0.941 settings ###############################################################################