#!/usr/bin/python # Copyright (c) 2005 by Mark T. Holder, Florida State University. (see end of file) ''' Node class ''' from splits import * import cStringIO import random from PIPRes.nexus.primitives import NexusToken from PIPRes.util.io import cipresGetLogger _LOG = cipresGetLogger('pipres.node') class BadTreeStructureError(ValueError): '''invalid trees already created (not raised in reading from a string).''' pass def iterTips(nd, func = None): '''Generator for tips that are descendants of 'nd' (filtered by the predicate 'func()' if supplied).''' if nd is None: return for n in iterPreorder(nd, Node.isLeaf): if func is None or func(n): yield n def iterInternals(nd, func = None): '''Generator for internals (in preordering) that are descendants of 'nd' (filtered by the predicate 'func()' if supplied).''' if nd is None: return for n in iterPreorder(nd, Node.isInternal): if func is None or func(n): yield n def iterChildren(nd, func = None): '''Generator for the immediate children of 'nd' (filtered by the predicate 'func()' if supplied).''' if nd is None: return ch = nd.lChild while ch is not None: if func is None or func(ch): yield ch ch = ch.rSib class Edge(object): '''An Edge on a phylogeny. (some operations require that end.par = start''' def __init__(self, endNd, startNd = None): self.end = endNd self.start = startNd is None and endNd.par or startNd def _bisect(self, nd): '''adds a new node as the ancestor of end, and attachs nd to this node. Note: Does NOT refresh split representation of any of the nodes involved! After calling this self.end.par will NOT be self.start ''' newnd = self.end._insertParent() newnd._addChild(nd) return newnd def iterEdges(nd, func = None): '''Generator returning all edges above, nd filtered by func.''' first = True for n in iterPreorder(nd): if not first and (func is None or func(n)): yield Edge(n) first = False def iterPostorder(nd, func = None): '''Generator for the descendants of 'nd' in post-order traversal ordering (filtered by the predicate 'func()' if supplied).''' if nd is None: return if nd.lChild is None: if func is None or func(nd): yield nd return c = nd._getListTraversal(Node.getLeftChild) toRetStack, s = c[:-1], c[-1] while True: if func is None or func(s): yield s if s.rSib is None: s = toRetStack.pop(-1) if len(toRetStack) == 0: if func is None or func(s): yield s return else: c = s.rSib._getListTraversal(Node.getLeftChild) toRetStack.extend(c[:-1]) s = c[-1] def iterPreorder(nd, func = None): '''Generator for the descendants of 'nd' in pre-order traversal ordering (filtered by the predicate 'func()' if supplied).''' if nd is None: return endNd = nd._getExitPreorder() while True: if func is None or func(nd): yield nd nd = nd.getNextPreorder() if nd == endNd: return class TreeDisplay: noEdgeLengths = 0 # keep this 0, so we can test for either withEdgeLength or demandEdgeLengths by implicit bool withEdgeLengthsIfPresent = 1 demandEdgeLengths = 2 class Node(object): '''Abstract Node class. Neither index nor split data memebers are defined (see NodeWithSplit and SimpleNode below).''' def __init__(self): self.par = None self.lChild = None self.rSib = None self.edgeLength = None #accessors def getDegree(self): '''Returns the number of connected edges.''' return self.numChildren + (self.par and 1 or 0) def getName(self, taxMgr = None): '''Returns a string name for the node using the taxMgr if one is supplied. Default name is a string form of (self.index + 1).''' index = self.index if taxMgr is None: #_LOG.debug('using number as label') return str(index + 1) if isinstance(taxMgr, list): return taxMgr[index] return taxMgr.getTaxLabel(index) def getNumChildren(self): '''Returns the number of immediate descendants.''' lc = self.lChild if lc is None: return 0 return 1 + lc.getNumRSibs() def getNumRSibs(self): '''Returns the number of immediate descendants.''' s = self.rSib n = 0 while not (s is None): n += 1 s = s.rSib return n def hasOneChild(self): return self.lChild and not self.lChild.rSib def isInternal(self): return self.lChild is not None def isLeaf(self): return self.lChild is None def isRoot(self): return self.par is None def getChildren(self): return [i for i in iterChildren(self)] def writeNewick(self, out, taxMgr = None, edgeLenCode = TreeDisplay.withEdgeLengthsIfPresent, internalNodeNames = False): if self.lChild: ch = self.lChild out.write('(') while True: ch.writeNewick(out, taxMgr, edgeLenCode, internalNodeNames=internalNodeNames) ch = ch.rSib if ch is None: break else: out.write(',') out.write(')') if internalNodeNames and self.__dict__.get('_internalName'): out.write(NexusToken.escapeString(self._internalName)) else: out.write(NexusToken.escapeString(self.getName(taxMgr))) if edgeLenCode and ((self.edgeLength is not None) or edgeLenCode == TreeDisplay.demandEdgeLengths): out.write(':%s' % (self.edgeLength or '0')) def writeDOTSubTree(self, out, taxMgr = None, edgeSymbol = '--'): for n in iterPostorder(self): if n == self: break if n.lChild: out.write('\t"%s" %s "%s";\n' % (hex(n.par._uniqueNum) , edgeSymbol, hex(n._uniqueNum))) else: out.write('\t"%s" %s "%s";\n' % (hex(n.par._uniqueNum) , edgeSymbol, n.getName(taxMgr))) for n in iterPostorder(self): if n.lChild: out.write('\t"%s" [shape=point label="" style=filled height = .1]\n' %hex(n._uniqueNum)) if n == self: break def __str__(self): c = cStringIO.StringIO() self.writeNewick(c, None, False, False) return c.getvalue() def _insertParent(self): '''Creates and returns a new node with self as the left child. self.par becomes grandparent. New node has no edgelength!''' nd = self.__class__() lSib = self.lSib nd.par, self.par = self.par, nd nd.lChild = self nd.rSib, self.rSib = self.rSib, None if self.__dict__.has_key('split'): nd.split = self.split if lSib is None: nd.par.lChild = nd else: lSib.rSib = nd return nd def _enforceMaxNChildren(self, maxN, secondaryCallMax, newNodeEdgeLen = None): '''Replaces rightmost nodes of self with a new node such that the max number of children is maxN. The new node will have the rightmost nodes as its children, edgeLength of "newNodeEdgeLen", and will call _enforceMaxNChildren() on itself (but not recurse down the subtree calling the method). "secondaryCallMax" is a hack to allow the root to have degree 1+other children recursive calls to _enforceMaxNChildren use "secondaryCallMax" for "maxN" ''' c = self.children if len(c) > maxN: assert(maxN > 1) lastStationaryChild = c[maxN - 2] toMove = c[maxN - 1:] nd = self.__class__() nd.par = self nd.edgeLength = newNodeEdgeLen lastStationaryChild.rSib = nd nd._addChildren(toMove) nd._refreshOwnSplit() nd._enforceMaxNChildren(secondaryCallMax, secondaryCallMax, newNodeEdgeLen) def standardizeChildOrder(self): '''Reorders children so that the subtree with the lowest number descendant is the leftmost.''' children = self.children if len(children) < 2: return #using extracted item sort extractChildList = [(c.getLowestDescendantTipIndex(), c) for c in children] extractChildList.sort() self.lChild = extractChildList[0][1] nd = self.lChild for ndTuple in extractChildList[1:]: nd.rSib = ndTuple[1] nd = nd.rSib nd.rSib = None def randomRefine(self, nChildren = 2, rng = None): """Randomly refines the tree such that it has the specified number of children. Setting new edges to length 0.0 returns a list of new Nodes created.""" if rng is None: rng = random.Random() children = self.children if nChildren >= len(children): return [] if nChildren < 2: ValueError, 'nChildren must be greater than 1' # randomly partition the current children into nChildren sets which will become members of the # self node's children newChildrenPartition = rng.sample(children, nChildren) newChildrenLists = [[i] for i in newChildrenPartition] maxCat = nChildren - 1 for child in children: if child not in newChildrenPartition: newChildrenLists[rng.randint(0, maxCat)].append(child) # now we add the listed children in adding intervening nodes as necessary self.lChild = None newlyCreatedNodes = [] for newGrandKids in newChildrenLists: if len(newGrandKids) > 1: rng.shuffle(newGrandKids) self._addChild(newGrandKids[0]) if len(newGrandKids) > 1: attachNd = newGrandKids[0]._insertParent() newlyCreatedNodes.append(attachNd) attachNd.edgeLength = 0.0 attachNd._addChildren(newGrandKids[1:]) attachNd._refreshOwnSplit() return newlyCreatedNodes def addToEdgeLength(self, other): ''' adds the edge length of other to self (made difficult by edgeLength = None)''' if other is not None: if self.edgeLength is None: self.edgeLength = other else: self.edgeLength += other or 0 return self.edgeLength def collapseEdge(self): '''Inserts all children of self into self.par in place of self. Edge length (and other node info) is lost. self node will be detached from the tree.''' p = self.par if not p: return if not self.lChild: raise ValueError, 'Node.collapseEdge called with a terminal.' if self == p.lChild: p.lChild = self.lChild else: self.lSib.rSib = self.lChild self.rChild.rSib = self.rSib for c in iterChildren(self): c.par = p def collapseClade(self): for c in iterChildren(self): if c.isInternal(): c.collapseClade() self.collapseEdge() def collapseDescendants(self): for c in iterChildren(self): if c.isInternal(): c.collapseClade() def pruneSelf(self, collapseDegTwo = True): '''Prunes the subtree rooted at self. Does NOT refresh splits. Will cause deletion of par if par is degree < 3 and is not the root (if collapseDegTwo = True).''' if not self.par: return par = self.par lSib = self.getLeftSib() if not lSib: if not self.rSib: par.lChild = None par.pruneSelf(collapseDegTwo) else: par.lChild = self.rSib if collapseDegTwo and par.degree < 3: par._removeSelfInternal() else: lSib.rSib = self.rSib if collapseDegTwo and not self.rSib and par.degree < 3: par._removeSelfInternal() self.rSib = None self.par = None #traversal accessors def getRightmostSib(self): if self.rSib is None: return None n = self.rSib while n.rSib is not None: n = n.rSib return n def getRightmostChild(self): n = self.lChild if n is None: return None if n.rSib is None: return n return n.getRightmostSib() def getLeftSib(self): n = self.par if n is None: return None n = n.lChild if n is None: raise BadTreeStructureError, "Node's parent has lChild = None" if n == self: return None while n.rSib != self: n = n.rSib if n is None: raise BadTreeStructureError, "Node not in parent's child list" return n def getLeftChild(self): return self.lChild def getNextPreorder(self): return self.lChild or self._getExitPreorder() def _createChild(self, edgeLength = None): '''Does not refresh ancestors split representations.''' n = self.__class__() n.par = self n.edgeLength = edgeLength if self.lChild is None: self.lChild = n else: self.rChild.rSib = n return n def _createChildren(self, n, edgeLength=None): if n == 0: return rightMost = self._createChild(edgeLength) for i in range(n-1): newNode = self.__class__() newNode.par = self newNode.edgeLength = edgeLength rightMost.rSib = newNode rightMost = newNode return rightMost def createSib(self, edgeLength = None): '''Does not refresh ancestors split representations.''' if self.rSib is not None: return self.getRightmostSib().createSib() n = self.__class__() self.rSib = n n.par = self.par n.edgeLength = edgeLength return n def isSibling(self, other): n = self.rSib while n: if n == other: return True n = n.rSib n = other.rSib while n: if n == self: return True n = n.rSib return False def makeSelfLeftmostChild(self): '''moves self to self.par.lChild''' lSib = self.lSib if not lSib: return lSib.rSib = self.rSib self.rSib = self.par.lChild self.par.lChild = self def _debugAttachedCorrectly(self, recurse = True): if self.par: found = False for i in iterChildren(self.par): if i == self: found = True assert(found) for i in iterChildren(self): if i.par != self: msg = 'Node %d is the subtree %s.\nThis node has a child, %d, that is the subtree %s whose par reference is not Node%d' %( self._uniqueNum, self.newickSubTree, i._uniqueNum, i.newickSubTree, self._uniqueNum) raise AssertionError, msg if recurse: i._debugAttachedCorrectly(True) lSib = self.lSib assert(not lSib or lSib.isSibling(self)) assert(not self.rSib or self.isSibling(self.rSib)) return True def _makeSelfRoot(self, collapseDegTwo = False, refreshSplits = False): '''Does not refresh splits or degree (unless requested). new root node will NOT be collapsed if it is degree == 2 even if collapseDegTwo is specified.''' if not self.par: if refreshSplits: self._refreshOwnSplit() return par = self.par lSib = self.getLeftSib() if not lSib: par.lChild = self.rSib else: lSib.rSib = self.rSib par._makeSelfRoot(collapseDegTwo, refreshSplits) par.edgeLength = self.edgeLength self.edgeLength = None self.rSib = None self.par = None self._addChild(par) if collapseDegTwo and par.numChildren < 2: par._removeSelfInternal() if refreshSplits: self._refreshOwnSplit() def _createDegreeTwoParent(self): '''Does not refresh splits''' nd = self.__class__() p = self.par if p: nd.par = p if p.lChild == self: p.lChild = nd else: self.lSib.rSib = nd nd.lChild = self nd.rSib = self.rSib self.par = nd self.rSib = None return nd def _stealChildren(self, children): for c in children: c.pruneSelf(False) self._addChildren(children) def _addChildren(self, children): '''Does not refresh splits or check for nodes of degree 2''' for c in children: self._addChild(c) def _addChild(self, child): '''Does not refresh splits or check for nodes of degree 2. sets child.rSib to None !!!''' child.par = self rChild = self.rChild if rChild is None: self.lChild = child else: rChild.rSib = child child.rSib = None def _getExitPreorder(self): n = self while True: if n.rSib: return n.rSib n = n.par if n is None: return None def _removeSelfInternal(self): '''Makes all of self's children, children of self's parent. Adds self's edgeLength to each. Has no effect on the root.''' if self.par is None: return if self.isLeaf(): raise ValueError, 'removeInternal called with a leaf.' lSib = self.getLeftSib() for c in iterChildren(self): c.par = self.par if (c.edgeLength is not None) and (self.edgeLength is not None): c.edgeLength += self.edgeLength if not lSib: self.par.lChild = self.lChild else: lSib.rSib = self.lChild rChild = self.getRightmostChild() rChild.rSib = self.rSib def _readInfo(self, tok, ind, tokStream): '''reads name and possibly edge length from newick token stream. returns breaking token''' if ind >= 0: self.setIndex(ind) tok = tokStream.next() first = True while str(tok) == ':': if first: self.edgeLength = float(str(tokStream.nextFloat())) first = False else: if not self.__dict__.has_key('extraInfo'): self.extraInfo = [] self.extraInfo.append(tokStream.next()) tok = tokStream.next() return tok def _getListTraversal(self, f): '''returns a list of objects created by looping from "self" using the callable "f" to advance the iteration, until None is returned''' n = self r = [] while n is not None: r.append(n) n = f(n) return r def _swapPlaces(self, other, refreshSplits = False): '''swaps parent and sibling references of self and other. Does not affect children of self or other. Does not refresh splits by default.''' lSib = self.lSib if lSib: lSib.rSib = other elif self.par: self.par.lChild = other lSib = other.lSib if lSib: lSib.rSib = self elif other.par: other.par.lChild = self self.rSib, other.rSib = other.rSib, self.rSib self.par, other.par = other.par, self.par if refreshSplits: if self.par: self.par._refreshOwnSplit() if other.par: other.par._refreshOwnSplit() def getNewickSubTree(self, taxMgr = None, edgeLenCode = TreeDisplay.withEdgeLengthsIfPresent): out = cStringIO.StringIO() self.writeNewick(out, taxMgr, edgeLenCode, False) return out.getvalue() def _treeContainsSplitNonTrivial(self, split, mask): '''Returns True if the tree that this node is a part of contains an edge that is identical to split (wrt to the bits in mask). The prefix "tree" indicates that ancestral portions of the tree are searched too.''' return self._treeFindSplitNonTrivial(split, mask) is not None rChild = property(getRightmostChild) lSib = property(getLeftSib) numChildren = property(getNumChildren) degree = property(getDegree) children = property(getChildren) newickSubTree = property(getNewickSubTree) class NodeWithSplit(Node): '''A concrete Node class. Has a split field. For leaves this field can be used to determine the index of the Leaf node indices may clash with internal node indices. splits fields have to be large enough to have a bit for each leaf in the tree, so storage for a tree is O(N^2). ''' def __init__(self): Node.__init__(self) self.split = 0L def getSplit(self): return self.split def getIndex(self): assert(self.__dict__.has_key('split') and self.split > 0) if self.__dict__.has_key('_index'): return self._index return getFirstBitAsIndex(self.split) def setIndex(self, ind): '''Does not refresh ancestors split representations.''' if self.__dict__.has_key('_index'): self._index = ind self.split = 1L << ind def getALeafInSplit(self, s): '''returns the first descendant leaf encountered whose index is on the split 's'.''' if not self.split & s: return None if self.isLeaf(): return self n = self while True: for c in iterChildren(n): if c.split & s: break assert(c.split & s) # split fields are incorrect (or isLeaf is lying) if c hasn't been set to a node that has a split intersection with s. if c.isLeaf(): return c n = c def getLowestDescendantTipIndex(self): return getFirstBitAsIndex(self.split) def _refreshOwnSplit(self): '''Refreshes this node's self.split field based on children. Not recursive. Assumes self is an internal.''' assert(self.lChild) self.split = 0L for i in iterChildren(self): self.split |= i.split def refreshSplits(self): if not self.lChild: return self.split = 0L for i in iterChildren(self): i.refreshSplits() self.split |= i.split def _treeFindSplitNonTrivial(self, split, mask): '''Returns True if self's tree has the an edge that corresponds to "split" where taxa not in "mask" are ignored. All comparisons are done assuming unrooted splits.''' if self.isLeaf(): return None refBit = getFirstBitOnly(mask) split = orientSplit(split, mask, refBit) & mask cmpCode, dir = compareFirstToOrientSplit(self.split, split, mask, refBit) if cmpCode == SplitCompatEnum.identical: return self if cmpCode == SplitCompatEnum.incompat: return None if dir < 0: # compareFirstToOrientSplit returns a tuple with a negative number if we need to move toward the ancestor to find # the split assert(not self.isRoot()) nd = self while dir < 0: prevNd = nd nd = prevNd.par if nd.isRoot(): break cmpCode, dir = compareFirstToOrientSplit(nd.split, split, mask, refBit) if cmpCode == SplitCompatEnum.identical: return nd if cmpCode == SplitCompatEnum.incompat: return None for currChild in iterChildren(nd): if currChild is not prevNd: cmpCode, dir = compareFirstToOrientSplit(currChild.split, split, mask, refBit) if cmpCode == SplitCompatEnum.identical: return currChild if cmpCode == SplitCompatEnum.incompat: return None if dir > 0: return currChild._descendantFindSplitNonTrivial(split, mask, refBit) return None return self._descendantFindSplitNonTrivial(split, mask) def _descendantFindSplitNonTrivial(self, split, mask, refBit): '''Checks descendants for the "split" (does not check self.split). Intended as a helper function to _treeFindSplitNonTrivial assumes that split has been oriented.''' nd = self while True: dir = -1 for child in iterChildren(nd): cmpCode, dir = compareFirstToOrientSplit(child.split, split, mask, refBit) if cmpCode == SplitCompatEnum.identical: return child if cmpCode == SplitCompatEnum.incompat: return None if dir > 0: break if dir < 0: return None nd = child def addCompatibleSplit(self, split, mask): '''Tries to create the split in the self subtree. Split is assumed to be oriented correctly.''' cmpCode, extras = compareSplits(self.split, split, mask, orientSplits=False) if cmpCode == SplitCompatEnum.identical: return True if cmpCode == SplitCompatEnum.incompat: raise ValueError, 'incompatible split' if extras < 0: return False # if compareSplits then we need to move toward the parent to find some of the leaves in the split for c in self.children: if c.addCompatibleSplit(split, mask): return True # if we get here, the split could not be added to any of the subtrees our children # and the split was not compatible with any of them. # This means that we can create the split as a child of self # we need to figure out which of self's children will be children of the new node toMove = [] for c in self.children: if c.split & split: toMove.append(c) assert(len(toMove) > 1) for n in toMove: n.pruneSelf(False) assert(self.lChild) # we should still have an unpruned child n = self._createChild() n._addChildren(toMove) n._refreshOwnSplit() return True _uniqueNum = property(getSplit) index = property(getIndex, setIndex) class SimpleNode(Node): '''A concrete Node class. Every instance has a unique index - unique within the scope of that type of node in that tree. Leaf node indices may clash with internal node indices. indices have to be large enough to enumerate all of the leaves in the tree, so storage for a tree is O(N log N). Split representations of the node are NOT stored. ''' def __init__(self): Node.__init__(self) self._index = -1L def getUnique(self): '''Returns isLeaf ? self.index : -id(self) (Implemented becuase the _uniqueNum property is a split for NodeWithSplit)''' return self.isLeaf() and self._index or -id(self) def setIndex(self, ind): self._index = ind def getLowestDescendantTip(self): if self.isLeaf(): return self indNdList = [(i._index, i) for i in iterTips(i)] indNdList.sort() return indNdList[0][1] def getLowestDescendantTipIndex(self): return self.getLowestDescendantTip()._index def _refreshOwnSplit(self): '''Does nothing (used if type = NodeWithSplit not SimpleNode). Assumes self is an internal.''' assert(self.lChild) return def refreshSplits(self): pass def _treeFindSplitNonTrivial(self, split, mask): raise NotImplementedError, 'SimpleNode does not implement this method (yet)' def getIndex(self): return self._index def setIndex(self, i): self._index = i index = property(getIndex, setIndex) _uniqueNum = property(getUnique) # This file is part of the PIPRes library # # The PIPRes library is free software; you can redistribute it # and/or modify it under the terms of the GNU Lesser General # Public License as published by the Free Software Foundation; # either version 2.1 of the License, or (at your option) any later # version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free # Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, # MA 02111-1307, USA