#!/usr/bin/python # Copyright (c) 2005 by Mark T. Holder, Florida State University. (see end of file) import sys from PIPRes.tree import Tree from PIPRes.splits import makeSimpleMask from PIPRes.node import NodeWithSplit from PIPRes.cipres_types import CipresDiscreteMatrix from PIPRes.nexus.primitives import LabelsOrNumbersTaxaManager class PagelFormatError(ValueError): def __init__(self, message, lineNumber = None, line = None): self.ln = lineNumber self.line = line self.message = message def __str__(self): numTxt = self.ln is not None and (" at line %d" % self.ln) or '' lineTxt = self.line and (' "%s"' % self.line) or '' return 'Error%s%s: %s' % (numTxt, lineTxt, self.message) def fromPagelDiscreteFormat(file): # the first step is readin the nodes data into a dictionary of names to nodes and a text representation of the matrix textMatrix = [] symbols = '01' ndDict = {} #maps name =>[nd, parName, linedefined] taxNames = [] for number, line in enumerate(file): stripped = line.strip() if stripped.startswith('#') or len(stripped) == 0 : continue splitStr = stripped.split(',') processed = map(str.strip, splitStr) #print processed if len(processed) < 3: raise PagelFormatError('Expecting at least ,, for every node.', 1 + number, stripped) currNd = NodeWithSplit() name = processed[0] parName = processed[1] edgeLenStr = processed[2] if name in ndDict: prevDef = ndDict[name] raise PagelFormatError('Node %s was already defined on line %d.' % (name, prevDef[2]), 1 + number, stripped) currNd.name = name try: currNd.edgeLength = float(edgeLenStr) except : raise PagelFormatError('Expecting third element to be a numeric branch length (found %s)' % edgeLenStr, 1 + number, stripped) ndDict[name] = [currNd, parName, number] if len(processed) > 3: currNd.setIndex(len(taxNames)) thisRow = [] for datum in processed[3:]: if len(datum) > 1: # MTH not sure if this a requirement of the format raise PagelFormatError('Expecting trait definition to be a single character (found "%s")' % datum, 1 + number, stripped) if datum not in symbols: symbols = symbols + append(datum) thisRow.append(datum) taxNames.append(name) textMatrix.append(thisRow) rootNd = None ndNames = ndDict.keys() for name in ndNames: el = ndDict[name] currNd = el[0] parName = el[1] parNdList = ndDict.get(parName) if parNdList is None: if rootNd is None: rootNd = NodeWithSplit() rootNd.name = parName parNdList = [rootNd, '', -1] ndDict[parName] = parNdList else: raise PagelFormatError('Disconnected Tree. Nodes %s and %s both appear to be root nodes' %(rootNd.name, parName), 1 + el[2]) parNd = parNdList[0] parNd._addChild(currNd) if rootNd is None: raise PagelFormatError('Cycle detected. No root node was found') expectedSplit = makeSimpleMask(len(taxNames)) rootNd.refreshSplits() if rootNd.split != expectedSplit: raise PagelFormatError('Disconnected Tree. Some taxa are not connected to the root node.') tm = LabelsOrNumbersTaxaManager(taxNames) t = Tree(rooted=True, taxaManager=tm) t.root = rootNd t.hasEdgeLengths = True m = CipresDiscreteMatrix(textMatrix, symbols=symbols, nStates=len(symbols)) return t, m def fromPagelDiscreteFile(filePath): f = open(filePath, 'rU') return fromPagelDiscreteFormat(f) if __name__ == '__main__': from PIPRes.basic import writeNexusDataMatrix, writeTreesBlock import cStringIO for f in sys.argv[1:]: try: t, matrix = fromPagelDiscreteFile(f) s = cStringIO.StringIO() taxLabels = t.taxaManager.getTaxLabels() s.write('#NEXUS\n') writeNexusDataMatrix(s, matrix, taxLabels) writeTreesBlock(s, [t], taxLabels) print s.getvalue() except IOError: print 'The file', f ,'could not be opened.' except PagelFormatError, e: print 'Error reading', f ,':' print ' ', e # This file is part of the PIPRes library # # The PIPRes library is free software; you can redistribute it # and/or modify it under the terms of the GNU Lesser General # Public License as published by the Free Software Foundation; # either version 2.1 of the License, or (at your option) any later # version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free # Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, # MA 02111-1307, USA