'''Creates a tree from a file with (compatible) splits.''' import sys import random from PIPRes.tree import TreeWithSplits from PIPRes.node import iterInternals from PIPRes.splits import * from PIPRes.nexus.public_blocks import * from PIPRes.util.io import cipresGetLogger _LOG = cipresGetLogger('example.treeFromSplits') _version = '0.01' _options = [ ['-v', 'prints version'], ['-h', 'prints help'], ['-n', 'output in a NEXUS-ish format'], ['', 'splits file'], ['', 'path to a NEXUS file with the taxa to use in a data block.'], ] def getVersion(): return 'treeFromSplits.py version %s (MTH)\n' % _version def printVersion(outS): outS.write(getVersion()) def printHelp(outS): printVersion(outS) outS.write('Reads :\n ') outS.write('\n '.join(['%10s %s' % (i[0],i[1]) for i in _options])) outS.write('\n\n') def purePythonGetDataBlock(inFilename): matrices = [] try: NexusParsing.pushCurrentFile(inFilename) inF = open(inFilename, 'rU') NexusParsing.statusMessage('Reading %s ...' % inFilename) for b in NexusBlockStream(inF, ALL_PUBLIC_BLOCKS, True): if isinstance(b, NexusCharactersBlock): matrices.append(b) break NexusParsing.popCurrentFile() inF.close() except NexusError, x: import os print >>sys.stderr, x NexusParsing.statusStream = None return matrices def parseSplitFileObject(f): """Expects file-like object with lines of .**.**. name .**..*. name Returns dict of {split:rest-of-line} and nLeaves in splits """ srn = re.compile(r'^([.*]+)\s+(.*)$') d = {} nLeaves = None for n, l in enumerate(f): if l: m = srn.match(l) if not m: sys.exit('line %d does not match expected format of split rep and then name'% n) g = m.groups() s = g[0] if nLeaves is None: nLeaves = len(s) elif len(s) != nLeaves: sys.exit('Split rep in line %d differs in length from previous lines' % n) s= splitRepToSplit(s) n = g[1] if s in d: sys.exit('Split in line %d has already been read' % n) d[s] = n.strip() return d, nLeaves def parseSplitsFile(f): """Expects path to file with lines of .**.**. name .**..*. name """ try: s = open(f, 'rU') except: sys.exit('Could not open %s' % f) return parseSplitFileObject(s) if __name__ == '__main__': nexusFilename = None splitsFilename = None version, help, nexus, nLeaves, nTrees, meanBrLen, seed = False, False, False, 4, 1, 1.0, None for arg in sys.argv[1:]: if arg[0] == '-': if arg == '-h': help = True elif arg == '-v': version = True elif arg == '-n': nexus = True else: printHelp(sys.stderr) sys.exit('Unrecognized option: %s' % arg) else: if nexusFilename is not None: sys.exit('Only one NEXUS file can be specified (2 found %s and %s)' % (filename, arg)) if splitsFilename is None: splitsFilename = arg else: nexusFilename = arg if help: printHelp(sys.stdout) sys.exit(0) if version: printVersion(sys.stdout) sys.exit(0) if splitsFilename is None: sys.exit('name of a splits files is required as the first argument (that does not begin with -)') splitReps, nLeaves = parseSplitsFile(splitsFilename) if nexusFilename is None: tNames = map(lambda x: 't%d' % (x + 1), range(nLeaves)) else: mats = purePythonGetDataBlock(nexusFilename) if not mats: sys.exit('No Data matrices found') mat = mats[0] # what to do if we see multiple? tNames = mat.getTaxLabels() if not (tNames): sys.exit('Error. Could not get a taxon list from the data matrix') nLeavesFromNexus = len(tNames) if nLeaves != nLeavesFromNexus: sys.exit('Splits file has %d taxa, but NEXUS data matrix had %d taxa' %(nLeaves, nLeavesFromNexus)) t = TreeWithSplits(splits=list(splitReps.iterkeys()), taxLabels=tNames) r = t.root mask = r.split for k, v in splitReps.iteritems(): found = False for n in iterInternals(r): if splitsEqual(k, n.split, mask): n._internalName = v found = True break assert(found) print """#NEXUS begin trees; tree test = [&U] %s; end; """ % t.getNewick(internalTaxonLabels=True)