#!/usr/bin/python import sys, re, copy from PIPRes.tree import * from PIPRes.nexus.primitives import NexusTaxaManager import string _underToBlankTrans = string.maketrans('_', ' ') def toNexus(s): t = s.strip() if "'" in t: if len(t) < 2 or t[0] != "'" or t[-1] != "'" or "'" in t[1:-1]: sys.exit('Parser does not deal with quotes') return t[1:-1] return t.translate(_underToBlankTrans) def stripComments(s): cmtPat = re.compile(r'\[[^\[\]]*\]') prev = '' while s != prev: prev = s s = cmtPat.sub('', prev) return s def parseDivaInFileObj(f): '''Uses very primitive nexus parsing does not deal with comments or quotes correctly''' wholeFile = f.read() if wholeFile[:len('#NEXUS')].upper() == '#NEXUS': wholeFile = wholeFile[len('#NEXUS'):] wholeFile = stripComments(wholeFile) cmds = wholeFile.split(';') taxLabels = [] taxManager = None trees = [] for cmdline in cmds: words = cmdline.strip().split() if len(words): cmd = words[0].upper() if cmd == 'TAXLABELS': taxLabels = [toNexus(i) for i in words[1:]] if not taxLabels: sys.exit('Expecting space separated labels after the TAXLABELS command.') # print 'Read labels %s' % ','.join(taxLabels) taxManager = NexusTaxaManager(taxLabels) elif cmd == 'TRANSLATE': tl = [''] * (len(words) - 1) ind = None label = None for n in words[1:]: #print n if ind is None: ind = int(n) - 1 elif label is None: if n.endswith(','): tl[ind] = n[:-1] ind = None else: label = n else: assert(n == ',') tl[ind] = label label = None ind = None if ind is not None: assert(label is not None) tl[ind] = label firstNone = len(tl) lastValid = -1 for n, i in enumerate(tl): if not i: firstNone = min(n, firstNone) else: lastValid = n assert(lastValid >= 0) if firstNone > -1: if firstNone > lastValid: taxLabels = tl[:firstNone] else: taxLabels = copy.copy(tl) for n, i in enumerate(tl): if not i: taxLabels[n] = 'taxon%d'% (n+1) else: taxLabels = tl taxManager = NexusTaxaManager(taxLabels) elif cmd == 'TREE': if taxManager is None: sys.exit('Expecting TAXLABELS or TRANSLATE command before the first tree.') if len(words) < 3: sys.exit('Expecting tree name and newick representation after TRee command.') name = words[1] assert(words[2] == '=') startwordInd = 3 if words[startwordInd].startswith('['): if words[startwordInd].endswith(']'): assert(len(words) > startwordInd) startwordInd += 1 else: sys.exit('Expecting comment before tree (if a comment is present) to be whitespace delimited.') newick = ''.join(words[startwordInd:]) trees.append(TreeWithSplits(newick, rooted=True, name=words[1], taxaManager=taxManager)) if taxManager is None: sys.exit('No taxalabels or trees found') if not trees: sys.exit('No Trees found') nTax = len(taxLabels) for n, tree in enumerate(trees): print 'Tree', n+1, 'label=%s: %s' %(tree.name, tree.newick) for nd in iterPostorder(tree.root, Node.isInternal): print 'Node', nTax + nd.internalNodeIndex + 1, toSplitRep(nd.split, nTax) if __name__ == '__main__': if len(sys.argv) < 2: sys.exit('%s Error:\n Expecting (at least one) name of an input tree file to be passed in as a command line argument' %sys.argv[0]) for filename in sys.argv[1:]: file = open(filename) # print '#Parsing %s' % filename parseDivaInFileObj(file)