#!/usr/bin/python # Copyright (c) 2005 by Mark T. Holder, Florida State University. (see end of file) '''routines for making reading paup output files and formatting input for paup.''' import re from PIPRes.splits import splitRepToSplit class SplitsNotFoundError(ValueError): pass def readPaupPScores(f, demandHeader = True): if isinstance(f, str): f = open(f, 'rU') fIter = iter(f) firstLine = f.next().upper().split() scores = [] if firstLine[0] != 'TREE' or firstLine[1] != 'LENGTH': if demandHeader: raise ValueError, 'File does not start with "Tree Length"' scores.append(long(firstLine[1])) try: scores.extend([long(i.split()[1]) for i in fIter]) except AttributeError, IndexError: raise ValueError, 'File does not consist of rows of Tree# TreeScore pairs.''' return scores def readPaupLScores(f, model, demandHeader = True, longFmt = True): '''Currently returns a list of lists of floats. The first element is the -lnL and the rest are model params. SOON this will change to a list of pairs of [lnL, modelSpec] where the modelSpec is an object with values for the parameters inferred by PAUP''' # always raising an exception now. Need to check sign of lnL (I am adding # score_cmp function now for parsimony and don't want to get distracted # testing the reading of lnL. mth june-27,2007 if True: #model is not None: raise NotImplementedError, 'Sorry readPaupLScores with a model is just a place holder.' if isinstance(f, str): f = open(f, 'rU') fIter = iter(f) firstLine = f.next().upper().split() scores = [] if firstLine[0] != 'TREE' or firstLine[1] != '-LNL': if demandHeader: raise ValueError, 'File does not start with "Tree Length"' scores.append(map(float, firstLine[1:])) try: scores.extend([map(float, i.split()[1:]) for i in fIter]) except AttributeError, IndexError: raise ValueError, 'File does not consist of rows of Tree# TreeScores .''' if len(scores) == 0: return [] # we are negating the -lnL to get the lnL here. if len(scores[0]) == 1: return [[-i[0]] for i in scores] return [[-i[0] + i[1:]] for i in scores] def readPaupDScores(f, demandHeader = True): raise NotImplementedError, 'Sorry readPaupDScores is just a place holder.' _buggyRawSplitRep = re.compile('([.*]+)(\d*\s*)$') def rawMBSplitRepToSplit(s): '''MB seems to concatenate the split rep and the number of times it is seen in one line, hence we pull of the split rep here.''' global _buggyRawSplitRep m = _buggyRawSplitRep.match(s) if not m: raise ValueError, 'split representation %s does not conform to expected pattern of . and * characters' % s return splitRepToSplit(m.group(1)) def readPAUPLogForSplitFreqs(f, demandHeader = True): if isinstance(f, str): f = open(f, 'rU') fIter = iter(f) splitRepToSplit headerFound = False if demandHeader: dividerPattern = re.compile(r'^\-+\s*$') introPattern = re.compile(r'^Bipartitions[ a-zA-Z()]+:\s*$') patternToBreakOn = introPattern for line in fIter: if patternToBreakOn.match(line): if patternToBreakOn is dividerPattern: patternToBreakOn = None break patternToBreakOn = dividerPattern if patternToBreakOn is not None: raise SplitsNotFoundError, 'Expecting split frequency table to be preceded by Bipartion... and a row of -----\'s' splitPattern = re.compile(r'[.*]+') splitFreq = {} for line in fIter: if not splitPattern.match(line): break lineBroken = line.split() split = splitRepToSplit(lineBroken[0]) if len(lineBroken) == 2: n = lineBroken[1] else: n = lineBroken[2] if n[-1] != '%': raise ValueError, 'Expecting the last 3rd column in the splits table to end in %!' n = n[:-1] splitFreq[split] = float(n)/100.0 return splitFreq def readMBSplitFreqs(f, demandHeader = True, missingWhitespaceBug = True): '''Reads a MRBAYES .parts file and returns a dictionary mapping splits to their posterior probabilty. Currently a hack - not really checking for any recognizable content in header. Assuming that the table always starts with a 1 (and no line in the header starts with 1). Don't know how MB deals with long split reps. missingWhitespaceBug (in 3.1.1 there is a missing whitespace bug that causes the second and third columns of output to have no whitespace between them). Untested.''' if isinstance(f, str): f = open(f, 'rU') fIter = iter(f) if missingWhitespaceBug: ind = 2 splitDecoder = rawMBSplitRepToSplit else: ind = 3 splitDecoder = splitRepToSplit headerFound = False while True: lineSplit = fIter.next().split() if len(lineSplit) > 0: if lineSplit[0] == '1': if demandHeader and not headerFound: raise ValueError, 'MRBAYES split frequency header' break headerFound = True splitFreq = {} splitFreq[splitDecoder(lineSplit[1])] = float(lineSplit[ind]) for line in fIter: lineSplit = line.split() splitFreq[splitDecoder(lineSplit[1])] = float(lineSplit[ind]) return splitFreq # This file is part of the PIPRes library # # The PIPRes library is free software; you can redistribute it # and/or modify it under the terms of the GNU Lesser General # Public License as published by the Free Software Foundation; # either version 2.1 of the License, or (at your option) any later # version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free # Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, # MA 02111-1307, USA