#!/usr/bin/python '''Currently implements tree searching via a file-based wrapper around PAUP4.0b10 Ratchet code follows the strategy of PAUPRat (Sikes and Lewis) impl of Nixon (1999) "Parsimony Ratchet" search technique ''' import os import re import time from itertools import izip from threading import Event, Thread, Lock from PIPRes.util.server_import import * from PIPRes.cipres_types import * from PIPRes.wrap.nexus_program_server import * from PIPRes.wrap.output_parser import * from PIPRes.service_impl.split_freq_supplier import SplitFreqSupplier from PIPRes.util.cipres import getCipresProperty from PIPRes.splits import splitToNewick from PIPRes.cipres_types import * from PIPRes.util.io import cipresGetLogger from PIPRes.event_consumer import EventSupplier, LineReadingThread, FileWatchingThread _LOG = cipresGetLogger('pipres.service_impl.paup_wrap.py') class ConstraintType: DONT_ENFORCE, MONOPHYLETIC, BACKBONE, CONVEX, NOT_MONOPHYLETIC, NOT_BACKBONE, NOT_CONVEX = range(7) toPaupType = ['', 'MONOPHYLY', 'BACKBONE', 'CONVEX', 'MONOPHYLY', 'BACKBONE, CONVEX'] def isConverse(t): return t >= ConstraintType.NOT_MONOPHYLETIC isConverse = staticmethod(isConverse) class WrapperCommandError(ValueError): def __init__(self, m): ValueError.__init__(self, m) self.msg = m class PaupDStatusReadingThread(LineReadingThread): """A thread that will read the input stream and call an function with the tree strings as they appear. `toCall` passing it (tree, score, model_str) as they appear when run is is called. The thread terminates if: - "end;.*" is encountered as a line. - the stop Event is set """ # header and status line for searches that involve one replicate one_rep_stat_line_re = re.compile(r"\s*(\d+:\d+:\d+)\s+([-0-9]+)\s+([-0-9]+)\s+([-0-9]+)\s+([-0-9]+)\s+([ -.0-9]+)\s*") one_rep_pre_re = re.compile(r"\s+time\s+added\s+tried\s+saved\s+left-to-swap\s+tree\(s\)") # header and status line for searches that involve multiple replicates multi_rep_stat_line_re = re.compile(r"\s*(\d+:\d+:\d+)\s+([-0-9]+)\s+([-0-9])+\s+([-0-9]+)\s+([-0-9]+)\s+([-0-9]+)\s+([ -.0-9]+)\s+([ -.0-9]+)\s*") multi_rep_pre_re = re.compile(r"\s+time\s+rep\s+added\s+tried\s+saved\s+left-to-swap\s+this\s+rep\s+overall\s*") # patterns for the start and end of a dstatus table starting_re = re.compile(r"\s*\-{10,}\s*") ending_re = re.compile(r".*completed\s*") def __init__(self, toCall, filename, stop_event=None, READ_SLEEP_INTERVAL=0.5): """`toCall` is the callable that takes a (raw) tree string, score and model and returns a False value to terminate the reading. see LineReadingThread for other args """ self.toCall = toCall if not callable(self.toCall): raise ValueError("toCall arg must be callable") self.started = False self.ready_to_start = False self.one_rep = False LineReadingThread.__init__(self, lineCallback=None, stream=None, filename=filename, stop_event=stop_event, READ_SLEEP_INTERVAL=READ_SLEEP_INTERVAL) def keep_going(self, line): if self.started: if self.one_rep: m = PaupDStatusReadingThread.one_rep_stat_line_re.match(line) if m: e_time, tax_add, rearr, n_tre, swap_left, score = m.groups() if rearr == "-" or rearr == "0": msg = "Constructing starting tree. Adding taxon %s." % tax_add else: msg = "Current score = %s. %s trees found. %s left to "\ "swap. %s rearrangements tried." % (score, n_tre, swap_left, rearr) msg = msg + " Elapsed time = %s" % e_time return self.toCall(msg) else: m = PaupDStatusReadingThread.multi_rep_stat_line_re.match(line) if m: e_time, rep, tax_add, rearr, n_tre, swap_left, rep_sc, score = m.groups() if score == "-": msg = "Constructing starting tree. Search replicate %s."\ " Adding taxon %s." % (rep, tax_add) else: msg = "Current score = %s. Search replicate %s. "\ "%s trees found. %s left to swap. "\ "%s Rearrangements tried." % (score, rep, n_tre, swap_left, rearr) msg = msg + " Elapsed time = %s" % e_time return self.toCall(msg) if PaupDStatusReadingThread.ending_re.match(line): self.started = False elif self.ready_to_start: if PaupDStatusReadingThread.starting_re.match(line): self.started = True else: self.ready_to_start = False elif PaupDStatusReadingThread.one_rep_pre_re.match(line): self.one_rep = True self.ready_to_start = True elif PaupDStatusReadingThread.multi_rep_pre_re.match(line): self.one_rep = False self.ready_to_start = True return True class PaupRatchetTreeReadingThread(FileWatchingThread): """A thread that will return trees from the ratchet iterations of a paup-ratchet run. """ def __init__(self, paup_wrapper, dir=os.curdir, stop_event=None, READ_SLEEP_INTERVAL=0.5): """`paup_wrapper` is the PaupWrap instance that is managing the paup run. see FileWatchingThread for other args """ self.dir = dir self.score_files = list(paup_wrapper._ratchet_score_files) self.tree_files = list(paup_wrapper._ratchet_tree_files) self.paup_wrapper = paup_wrapper self.sleep_interval = READ_SLEEP_INTERVAL FileWatchingThread.__init__(self, group=None, target=None, name=None, args=(), kwargs={}) def run(self): iteration = 0 for tree_fn, score_fn in izip(self.tree_files, self.score_files): tree_fp = os.path.join(self.dir, tree_fn) score_fp = os.path.join(self.dir, score_fn) score_list = None while not score_list: if not self.wait_for_file_to_appear(score_fp): return # we have to deal with exception silently and try again because the file may be incomplete try: score_list = self.paup_wrapper._readScores(score_fp) except: pass if not score_list: time.sleep(self.sleep_interval) tree_list = None while not tree_list: if not self.wait_for_file_to_appear(tree_fp): return # we have to deal with exception silently and try again because the file may be incomplete try: time.sleep(self.sleep_interval) tree_list = self.paup_wrapper.readTrees(tree_fp) except: pass if not tree_list: time.sleep(self.sleep_interval) for n, tree in enumerate(tree_list): tree.m_name = "PAUP_Ratchet_iter_%d_tree_%d" % (1 + iteration, n + 1) iteration += 1 self.paup_wrapper._supply_trees_as_events(tree_list, score_list) class PaupWrap(object, CipresIDL_api1__POA.TreeImprove, CipresIDL_api1__POA.TreeInfer, CipresIDL_api1__POA.TreeEvaluate, SimpleNexusWrapperServer): debugging = True wrapper_command_pattern = re.compile(r"\[paup\.wrap\.([^\]]+)\]", re.I) def __init__(self, registry, paupPath): self.debugging = PaupWrap.debugging SimpleNexusWrapperServer.__init__(self, registry) self.convertTreeFunc = self.orb is None and toTreeBridge or toIDLTree self.registry = registry self.mat = None self.tree = None self.nexusReader = None settings = "set MaxTrees=10 NoIncrease ;" self.bootSearchDefault = settings + ' Default HSearch nomultrees swap=spr ' self.bootDefTemplate = settings + ' Default Boot nreps = %d cutoffpct=0 grpfreq=yes ' self.searchRNGSeeder = None self.bootRNGSeeder = None self.bootFreqFile = 'bootfreqs.log' self.nBootReps = 100 self.treeImproveDefault = settings + 'Default HSearch nomultrees swap=spr ' self.treeImproveDefault = self.treeImproveDefault + ' dstatus=30 ' self.treeInferDefault = settings + 'Default HSearch nomultrees swap=spr start=stepwise addseq=random' self.treeInferDefault = self.treeInferDefault + ' dstatus=30 ' self.treeEvaluateDefault = '' self.futurePaup = False # in future releases paup will support searching on partial leaf sets # if we aren't using future paup, we need to translate the taxon labels self.n_ratchet_reps = 0 # the number of ratchet iterations to perform self.n_ratchet_chars = 0 # the number of characters to upweight in each ratchet run self.ratchet_prop = 0.2 # the proportion of characters to upweight in each ratchet run self.ratchet_seed = None self.ratchet_rng = random.Random() self._ratchet_score_files = [] self._ratchet_tree_files = [] if not os.path.exists(paupPath): raise ValueError, 'The specified path to PAUP (%s) does not exist.' % paupPath self.paupPath = paupPath self.constraints = [] self.criterion = OptimalityCriterion.PARSIMONY self.model = None self.taxLabels = None self.taxManager = None self._toCallerTranslator = None # should be locked by translationObjLock self.translationObjLock = Lock() # lock to assure single thread access to translation data structs self.statusEventLock = Lock() # lock to assure single thread access to the event_supplier that posts self.event_supplier = None # self._statusEventConsumer = None # consumer that the event_supplier communicates with self.tempTreesToEventsThread = None # thread for sending tree events self._termEvent = None # threading.Event object that is used by the EventsThreads to check if paup is still running. self.extInvContext = None # temporary: using extInvContext for debugging purposes in readTree only def _paupAnalysis(self, start_tree, configCmd, searchCmd = 'HSearch'): if (searchCmd.upper() == 'HSEARCH') and (start_tree is not None): searchCmd = "HSearch start = 1" extInvContext = self._createTempDataFile( matrix=self.mat, tree=start_tree, supportsTaxaSubsets=self.futurePaup, dirPref='paupWrap') self.toCallerTranslator = not self.futurePaup and extInvContext.fromExternalTranslation or None self.extInvContext = extInvContext # temporary: aliasing extInvContext for debugging purposes in readTree only extraPaths = [os.path.splitext(extInvContext.filename)[0] + os.path.extsep + 'log'] extraPaths.extend(self._ratchet_tree_files) extraPaths.extend(self._ratchet_score_files) termEvent = self.getTerminatedEvent() try: scoreFile, treeFile = ('score', 'tree') cmdList = [ self.getInitializationCmd(), configCmd, searchCmd, 'sorttrees', self.getScoreCmd(scoreFile), "savetrees file = \'%s\' TaxaBlk BrLens = yes" % treeFile ] extInvContext.fileObj.write(self._embedInPaupBlock(cmdList)) extInvContext.fileObj.close() if self.event_supplier is not None: if self.is_running_ratchet(): tree_th = PaupRatchetTreeReadingThread( self, dir=extInvContext.tempDir, stop_event=termEvent) else: log_file_path = os.path.join(extInvContext.tempDir, "data.log") tree_th = PaupDStatusReadingThread( toCall=self.notify_caller, filename=log_file_path, stop_event=termEvent) self.tempTreesToEventsThread = tree_th self.tempTreesToEventsThread.start() red_fn = "stdouterr" extraPaths.append(red_fn) out_redirect_fp = os.path.join(extInvContext.tempDir, red_fn) out_redirect = open(out_redirect_fp, "w") self._callProcess(extInvContext.tempDir, [self.paupPath, '-n', str(extInvContext.filename)], out_redirection=out_redirect) _LOG.debug("PAUP* finished") termEvent.set() # this tells the output reading threads to die. self._termEvent = None scoreFile = os.path.join(extInvContext.tempDir, scoreFile) treeFile = os.path.join(extInvContext.tempDir, treeFile) extraPaths.append(scoreFile) extraPaths.append(treeFile) if not os.path.exists(scoreFile): if self.event_supplier is not None: if os.path.exists(log_file_path): c = open(log_file_path, "rU").read() self.notify_caller("PAUP* LOG (with any error messages) follows:\n\n%s" % c) else: self.notify_caller("PAUP* failed to return a tree or leave an error log") return [] if self.is_running_ratchet(): score_list, treesToReturn = self._get_ratchet_results(extInvContext.tempDir) else: score_list = self._readScores(scoreFile) treesToReturn = self.readTrees(treeFile) return self._map_trees_to_clients_names(treesToReturn, score_list) finally: self.toCallerTranslator = None self.extInvContext = None # temporary: using extInvContext for debugging purposes in readTree only if not self.debugging: self.cleanupTemporaries(extInvContext.tempDir, [extInvContext.filename] + extraPaths) else: _LOG.debug('leaving the temporary directory %s (running in debug mode)' % extInvContext.tempDir) def setCriterion(self, crit): if isinstance(crit, str): self._criterion = OptimalityCriterion.asStr.index(crit) else: OptimalityCriterion.asStr[crit] # raises IndexError if invalid self._criterion = crit def getCriterion(self): return self._criterion criterion = property(getCriterion, setCriterion) def getSearchSeedFragment(self): if self.searchRNGSeeder is None: return '' return ' rseed = %d ' % self.searchRNGSeeder.externalSeed() def getBootSeedFragment(self): if self.bootRNGSeeder is None: return '' return ' bseed = %d ' % self.bootRNGSeeder.externalSeed() def setConstraint(self, split, typeOfConstraint = ConstraintType.MONOPHYLETIC): if typeOfConstraint == ConstraintType.DONT_ENFORCE: if split is None: if self.constraints: self.constraints[0] = ConstraintType.DONT_ENFORCE return self.constraints = [typeOfConstraint, None] newick = None if isinstance(split, long) or isinstance(split, int): if self.taxLabels is not None: newick = splitToNewick(split, taxLabels=self.taxLabels) else: assert(self.mat is not None) newick = splitToNewick(split, nTaxa=len(self.mat), starting=1) elif isinstance(split, str): newick = split.strip() if newick.startswith('.') or newick.startswith('*'): newick = splitRepToSplit(newick) else: if not isinstance(split, Tree): raise TypeError, 'split must be long, newick string, or tree' if self.taxManager is not None: self.constraints[1] = CipresTree(newick, taxaManager=self.taxManager) else: self.constraints[1] = numberedLeafTree(newick) def setSearchSeed(self, seed): if self.searchRNGSeeder is None: self.searchRNGSeeder = RNGSeeder() self.searchRNGSeeder.seed(seed) def setBootSeed(self, seed): if self.bootRNGSeeder is None: self.bootRNGSeeder = RNGSeeder() self.bootRNGSeeder.seed(seed) def getBootDefaults(self): bootCmd = self.bootDefTemplate % self.nBootReps return bootCmd + self.getBootSeedFragment() def getBootSearchDefaults(self): return self.bootSearchDefault + self.getSearchSeedFragment() def touch(self): self.splitFreq = {} def setMatrix(self, mat): _LOG.debug('setMatrix called\n') # _LOG.debug('matrix = %s\n' % mat) self.mat = mat self.touch() def setTree(self, tree): _LOG.debug('setTree called. Converting arg to CipresTree\n') self.tree = CipresTree(tree) _LOG.debug('setTree called with arg:\n') _LOG.debug(self.tree.m_newick) self.touch() def _calcSplitsIfEmpty(self): if len(self.splitFreq) == 0: if not self.mat: raise RuntimeError, 'Failed to call setMatrix before requesting split frequency' self.bootstrapToGetSplits() def getSplitsTable(self): self._calcSplitsIfEmpty() return self.splitFreq def readPScores(self, scoreFile): pscore_list = readPaupPScores(scoreFile) #_LOG.debug('pscores read from %s' % scoreFile) return pscore_list def readLScores(self, scoreFile, model, longFmt = True, demandHeader = True): lscore_list = readPaupLScores(scoreFile, model, demandHeader=demandHeader, longFmt=longFmt) #_LOG.debug('lscores read from %s' % scoreFile) return lscore_list def readDScores(self, scoreFile): dscore_list = readPaupDScores(scoreFile) #_LOG.debug('dscores read from %s' % scoreFile) return dscore_list def readTrees(self, treeFile): try: self.nexusReaderLock.acquire() except: sys.exit('YIKES') try: if self.exiting: return nilTree() if self.nexusReader is None: _LOG.debug('getting ReadNexus object ref') self.nexusReader = self.registry.getService(CipresIDL_api1.ReadNexus) if self.nexusReader is None: raise RuntimeError, 'Could not acquire a ReadNexus servant' try: self.nexusReader.readNexusFile(treeFile, CipresIDL_api1.ReadNexus.NEXUS_TREES_BLOCK_BIT) if self.extInvContext and self.extInvContext.treeTaxLabels: # the translation of leaves appears to assume that # all of the PAUP tree files will be written with the # same taxa ordering (we don't translate each tree file # from its order to the "external" ordering). # this block of code checks this assumption taxa = self.nexusReader.getTaxa(0) if taxa != self.extInvContext.treeTaxLabels: expected = "\n".join(self.extInvContext.treeTaxLabels) in_file = "\n".join(taxa) _LOG.warn("Expecting:\n%s\nGot:\n%s\n" % (expected, in_file)) assert(False) return self.nexusReader.getTrees(0) finally: #TEMP pass#nr, self.nexusReader = self.nexusReader, None #nr.remove() finally: self.nexusReaderLock.release() def _paupHSearch(self, start_tree = None, configCmd = ''): if self.n_ratchet_reps > 0: search_cmd = self.compose_ratchet_cmd(start_tree) else: search_cmd = 'HSearch' return self._paupAnalysis(start_tree, configCmd, searchCmd=search_cmd) def execute(self, cmd): try: self._read_wrapper_commands(cmd) except WrapperCommandError, x: return False, x.msg return SimpleServer.execute(self, cmd) def _read_wrapper_commands(self, cmd): pwcmds = PaupWrap.wrapper_command_pattern.findall(cmd) for elem in pwcmds: cmd_list = elem.lower().split("=") num_words = len(cmd_list) assert(num_words > 0) cmd = cmd_list[0].strip() if num_words == 1: val = None raise WrapperCommandError("Expecting = after %s" % cmd) else: joined = "=".join(cmd_list[1:]) val = joined.strip() if cmd == "n.ratchet.reps": if not val.isdigit(): raise WrapperCommandError("n.ratchet.reps must be a non-negative number. Found %s" % val) self.n_ratchet_reps = int(val) if self.n_ratchet_reps < 0: self.n_ratchet_reps = 0 elif cmd == "n.ratchet.chars": if not val.isdigit(): raise WrapperCommandError("n.ratchet.chars must be a non-negative number. Found %s" % val) self.n_ratchet_chars = int(val) if self.n_ratchet_chars < 0: self.n_ratchet_chars = 0 elif cmd == "ratchet.prop": try: self.ratchet_prop = float(val) except ValueError: raise WrapperCommandError("ratchet.prop must be a number. Found %s" % val) if self.ratchet_prop < 0.0: self.ratchet_prop = 0.0 elif cmd == "ratchet.seed": try: self.ratchet_seed = long(val) except ValueError: raise WrapperCommandError("ratchet.seed must be an integer. Found %s" % val) if self.ratchet_seed < 0: raise WrapperCommandError("ratchet.seed must be an integer. Found %s" % val) self.ratchet_rng.seed(self.ratchet_seed) else: raise WrapperCommandError("\"%s\" not recognized" % val) _LOG.debug("Processed %s=%s" % (cmd, val)) def get_save_ratchet_trees_cmd(self, iteration): """saves best tree and score to files and adds these file names to the self._ratchet_score_files and self._ratchet_tree_files lists. """ score_file = "ratchet_iter_%d_score.txt" % iteration tree_file = "ratchet_iter_%d.tre" % iteration self._ratchet_score_files.append(score_file) self._ratchet_tree_files.append(tree_file) return [ self.getScoreCmd(score_file), "savetrees file=%s taxablk BrLens = yes replace" % tree_file, #"gettrees file=all_ratchet.tre mode=7", #"savetrees file=all_ratchet.tre replace", "gettrees file=%s mode=3 warntree=no;" % tree_file ] def compose_ratchet_cmd(self, start_tree): first_search = start_tree is None and "HSearch" or "HSearch start=1" self._ratchet_score_files = [] self._ratchet_tree_files = [] n_ch = self.mat.m_numCharacters if self.n_ratchet_chars == 0: n_up = int(n_ch*self.ratchet_prop) else: n_up = self.n_ratchet_chars if (self.n_ratchet_reps < 1) or (n_up == 0) or (n_ch < 2): return first_search commands = [first_search, #"savetrees file=all_ratchet.tre taxablk BrLens=yes replace", ] assert n_up >0 orig_weights = [1] * n_ch # ASSUMING no input character weights ch_range = range(n_ch) commands.extend(self.get_save_ratchet_trees_cmd(0)) for i in xrange(self.n_ratchet_reps): wti = self._ratchet_weight_to_inds(n_ch, n_up) if wti: weights_args = [] for w, c_list in wti.iteritems(): weights_args.append("%d : %s " % (w, " ".join([str(cn + 1) for cn in c_list]))) commands.append("Weights 1 : 1 - .") commands.append("Weights %s" % ", ".join(weights_args)) commands.append("HSearch start=1") commands.append("Weights 1 : 1 - .") # restore original weights !!ASSUMING no input character weights!! commands.append("HSearch start=1") commands.extend(self.get_save_ratchet_trees_cmd(i+1)) #commands.append("gettrees file=all_ratchet.tre mode=7") return ';\n\t'.join(commands) def _ratchet_weight_to_inds(self, n_chars, n_up): last_ind = n_chars - 1 up_weighted = {} for j in xrange(n_up): k = self.ratchet_rng.randint(0, last_ind) up_weighted[k] = up_weighted.get(k, 1) + 1 wgt_to_ind_list = {} for index, wgt in up_weighted.iteritems(): inds = wgt_to_ind_list.get(wgt,[]) inds.append(index) wgt_to_ind_list[wgt] = inds return wgt_to_ind_list def _embedInPaupBlock(self, s): """accepts a string or list of strings (each of which is a command). Adds paup block and semicolons. """ b = [ 'BEGIN PAUP', 'log start file=data.log append', ] if isinstance(s, str): b.append(s) else: b.extend(s) b.extend([ 'log stop', 'quit', 'END;']) return ';\n\t'.join(b) def _getScoreCmdPrefix(self, criterion): if criterion == OptimalityCriterion.PARSIMONY: return 'Pscore /' if criterion == OptimalityCriterion.LIKELIHOOD: return 'Lscore /LongFmt = yes ' return 'dscore /' def getInitializationCmd(self): '''Returns a command to be placed at the beginning of a paup block to set up the env. Currently this just sets the optimality criterion.''' settings = 'Set criterion = %s noerrorbeep nokeybeep noquerybeep ; ' % OptimalityCriterion.asStr[self.criterion] constraints = self.getConstraintsCmds() if constraints: return '%s;%s' % (settings, constraints) return settings def getScoreCmd(self, filename): prefix = self._getScoreCmdPrefix(self.criterion) suffix = filename and ("scorefile = \'%s\' " % filename ) or '' return '%s %s' % (prefix, suffix) def getTerminatedEvent(self): if self._termEvent is None: self._termEvent = Event() return self._termEvent def setStatusEventConsumer(self, proxyConsumer): """Sets the new event consumer and returns the old one. In current usage within CIPRES this will return None, but the caller should restore the old event consumer, when done with a proxyConsumer to handle future developments. """ sec = self._statusEventConsumer self._statusEventConsumer = proxyConsumer self.statusEventLock.acquire() try: if self._statusEventConsumer is not None: self.event_supplier = EventSupplier(self._statusEventConsumer) else: if self.event_supplier is not None: self.event_supplier.remove() self.event_supplier = None finally: self.statusEventLock.release() return sec def notify_caller(self, msg): self.statusEventLock.acquire() try: if self.event_supplier is None: return False self.event_supplier.send_str(msg) finally: self.statusEventLock.release() return True def _supply_trees_as_events(self, tree_list, score_list, model=None): """Return value is True to indicate more trees can be handled. """ conv_tr_list = self._map_trees_to_clients_names(tree_list, score_list) for t in conv_tr_list: self.statusEventLock.acquire() try: if self.event_supplier is None: return False self.event_supplier.send_tree(t) finally: self.statusEventLock.release() return True def setToCallerTranslator(self, tct): try: self.translationObjLock.acquire() self._toCallerTranslator = tct finally: self.translationObjLock.release() def getToCallerTranslator(self): return self._toCallerTranslator toCallerTranslator = property(getToCallerTranslator, setToCallerTranslator) def _map_trees_to_clients_names(self, tree_list, score_list): try: #_LOG.debug("Translating trees") self.translationObjLock.acquire() fixed_tree_list = [] for tree, score in izip(tree_list, score_list): t = addScoreAndMapLeaves(tree, score, self.criterion , self._toCallerTranslator) fixed_tree_list.append(self.convertTreeFunc(t)) return fixed_tree_list finally: self.translationObjLock.release() #_LOG.debug("done translating trees") def _readScores(self, scoreFile): if self.criterion == OptimalityCriterion.PARSIMONY: return self.readPScores(scoreFile) if self.criterion == OptimalityCriterion.LIKELIHOOD: # readLScores return [[lnL, model], [lnL, model]...] # here we pull out just the score. raw_l_score_read = self.readLScores(scoreFile, self.model, longFmt = True, demandHeader = True) return [i[0] for i in raw_l_score_read] return self.readDScores(scoreFile) def score_cmp(self, first_score, second_score): """Uses the current optimality criterion to returns: -1 if first_score is better than second_score, 0 if the scores are identical, 1 if the second_score is better """ if self.criterion == OptimalityCriterion.LIKELIHOOD: # bigger is better in ML return cmp(second_score, first_score) # smaller is better in distance and MP return cmp(first_score, second_score) def is_running_ratchet(self): return len(self._ratchet_score_files) > 0 def _get_ratchet_results(self, dir): """Returns a (list of scores, list of trees) representing the best trees found during a ratchet search. Note: the trees may be duplicates (if they were found in multiple iterations of the ratchet search). """ best_iterations = [] best_score_list = [] for n, sf in enumerate(self._ratchet_score_files): score_file = os.path.join(dir, sf) score_list = self._readScores(score_file) if not best_score_list: c = 1 else: c = self.score_cmp(best_score_list[0], score_list[0]) if c == 0: best_iterations.append(n) best_score_list.extend(score_list) elif c > 0: # new-score is better best_iterations = [n] best_score_list = score_list assert(best_iterations) treesToReturn = [] for i in best_iterations: tf = self._ratchet_tree_files[i] treeFile = os.path.join(dir, tf) #_LOG.debug("Reading tree of length %s from %s" % (str(best_score_list[0]), treeFile)) trees = self.readTrees(treeFile) for n, tree in enumerate(trees): tree.m_name = "PAUP_Ratchet_iter_%d_tree_%d" % (i + 1, n + 1) treesToReturn.extend(trees) return best_score_list, treesToReturn def evaluateTree(self, proxyConsumer): prevSEC = self.setStatusEventConsumer(proxyConsumer) try: try: _LOG.debug('evaluateTree called') if not (self.mat and self.tree): raise RuntimeError, 'Failed to call setTree and setMatrix before evaluateTree' return self._paupAnalysis(self.tree, self.treeEvaluateDefault + ' ; ' + self.userCommands(), searchCmd = '')[0] except: logException(_LOG) return nilTree() finally: self.setStatusEventConsumer(prevSEC) def improveTree(self, proxyConsumer): _LOG.debug('improveTree called') treeList = self.improveTreeReturnAll(proxyConsumer) if len(treeList) < 1: return nilTree() return treeList[0] def inferTree(self, proxyConsumer): _LOG.debug('inferTree called') treeList = self.inferTreeReturnAll(proxyConsumer) if len(treeList) < 1: return nilTree() return treeList[0] def improveTreeReturnAll(self, proxyConsumer): prevSEC = self.setStatusEventConsumer(proxyConsumer) try: try: _LOG.debug('in improveTreeReturnAll') if not (self.mat and self.tree): raise RuntimeError, 'Failed to call setTree and setMatrix before improveTree' return self._paupHSearch(self.tree, self.treeImproveDefault + ' ; ' + self.userCommands()) except: logException(_LOG) return [] finally: self.setStatusEventConsumer(prevSEC) def inferTreeReturnAll(self, proxyConsumer): prevSEC = self.setStatusEventConsumer(proxyConsumer) try: try: _LOG.debug('inferTree called') if not self.mat: raise RuntimeError, 'Failed to call setMatrix before inferTree' return self._paupHSearch(None, self.treeInferDefault + ' ; ' + self.userCommands()) except: logException(_LOG) return nilTree() finally: self.setStatusEventConsumer(prevSEC) def bootstrapToGetSplits(self): extInvContext = self._createTempDataFile( matrix=self.mat, tree=None, supportsTaxaSubsets=self.futurePaup, dirPref='paupWrap') log_fn = os.path.splitext(extInvContext.filename)[0] + os.path.extsep + 'log' extraPaths = [log_fn, self.bootFreqFile] try: cmdList = [ self.getInitializationCmd(), self.getBootSearchDefaults(), 'log stop', 'log file=%s start replace' % self.bootFreqFile, self.getBootDefaults(), self.userCommands(), 'boot', 'log stop', 'log file = data.log append start', ] extInvContext.fileObj.write(self._embedInPaupBlock(cmdList)) extInvContext.fileObj.close() self._callProcess(extInvContext.tempDir, [self.paupPath, '-n', str(extInvContext.filename)] ) # commandline or extInvContext.filename limit in paup means that we have to chdir and use relative paths splitsFile = os.path.join(extInvContext.tempDir, self.bootFreqFile) try: self.splitFreq = {} self.splitFreq = readPAUPLogForSplitFreqs(splitsFile) except SplitsNotFoundError: pass finally: if not self.debugging: self.cleanupTemporaries(extInvContext.tempDir, [extInvContext.filename] + extraPaths) else: _LOG.debug('leaving the temporary directory %s (running in debug mode)' % extInvContext.tempDir) def enforceConstraint(self, constraint, demandSplits = True): '''Adds a split of tree to the constaints. if "demandSplits" is false, then a converse constraint is used. Only affects searching (not tree evaluation)''' self.constraints = [] def getConstraintsCmds(self): if (not self.constraints) or self.constraints[0] == ConstraintType.DONT_ENFORCE: return '' t = self.constraints[0] conCmd = 'constraints theConstraint (%s) = %s' % (ConstraintType.toPaupType[t], self.constraints[1].getNewick()) searchCmd = 'default hsearch enforce constraints=theConstraint converse=%s' % (ConstraintType.isConverse(t) and 'yes' or 'no') return '%s;%s' %(conCmd, searchCmd) def _getPathToPaupFromArgv(argv): PaupWrap.debugging = '-debug' in argv if len(argv) < 2: _LOG.error(' '.join(argv)) raise ValueError, 'Expecting the path to Paup as an argument' return argv[1] def createPaupWrap(registry, argv): return PaupWrap(registry, _getPathToPaupFromArgv([0] + argv)) # we add an empty element to act like the script name in a sys.argv list def defaultPaupWrap(): '''Returns a MBTwoExpWrap object with a getCipresRegistry, and path to mbTwoExp from the environment.''' return PaupWrap(getCipresRegistry(), getCipresProperty('paupPath')) if __name__=='__main__': argv = sys.argv paupPath = _getPathToPaupFromArgv(argv) def PaupWrapFactory(registry, pathToPaup = paupPath): return PaupWrap(registry, pathToPaup) try: interfaceDict = {} for k in ['TreeEvaluate', 'TreeImprove', 'TreeInfer']: interfaceDict[k] = PaupWrapFactory cipresServe(argv, interfaceDict) except: logException(_LOG)