#!/usr/bin/python # Copyright (c) 2005 by Mark T. Holder, Florida State University. (see end of file) import math, cStringIO '''Functions for dealing with long's that represent splits. 1's indicate taxa above the split, and 0's represent the other taxa. >>>from PIPRes.splits import * ''' from PIPRes.util.io import cipresGetLogger _LOG = cipresGetLogger('pipres.split') _firstOnBit15Tuple = (0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0) def splitRepToSplit(splitRep): '''Converts a .* style string to a long with bits representing the split. The first character represents the 1-bit. .'s are mapped to 0 bits, *'s go to 1's >>> splitRepToSplit('.*.*') 10L >>> splitRepToSplit('.*.**') 26L >>> splitRepToSplit('**.**') 27L ''' n = long(0) curr = 1L for c in splitRep: if c == '*': n |= curr elif c != '.': raise ValueError, 'split representation %s does not conform to expected pattern of . and * characters' % splitRep curr <<= 1L return n def toSplitRep(split, nTaxa): ''' converts split to string with .* to indicate taxa (inverse of splitRepToSplit) >>> toSplitRep(10L, 4) '.*.*' >>> toSplitRep(26L, 5) '.*.**' >>> toSplitRep(27L, 5) '**.**' >>> toSplitRep(27L, 6) '**.**.' ''' splitRep = cStringIO.StringIO() currBit = 1L for i in xrange(nTaxa): splitRep.write((currBit & split) and '*' or '.') currBit <<= 1 return splitRep.getvalue() def splitToNewick(split, **kwargs): ''' converts the split to a newick representation. takes a "taxLabels" list OR ("nTaxa" AND "starting") integers for numeric labels >>> splitToNewick(10L, nTaxa=4, starting=1) '(1,3,(2,4))' >>> splitToNewick(10L, nTaxa=4, starting=0) '(0,2,(1,3))' >>> splitToNewick(5L, taxLabels=['a', 'b', 'c', 'd']) '(b,d,(a,c))' ''' tl = kwargs.get('taxLabels') if tl is None: nTaxa = kwargs.get('nTaxa') starting = kwargs.get('starting') if starting is None or nTaxa is None: raise TypeError, 'Expecting taxLabels or nTaxa and starting keyword args' tl = map(str, range(starting, starting + nTaxa)) else: nTaxa = len(tl) currBit = 1L above = [] below = [] only = None for i in xrange(nTaxa): if (currBit & split): above.append(tl[i]) else: below.append(tl[i]) currBit <<= 1 if not above: only = below elif not below: only = above else: return '(%s,(%s))' %(','.join(below), ','.join(above)) return '(%s)' % ','.join(only) def getLastBitAsIndex(a): if a < 1L: raise ValueError, 'argument < 1' log2 = math.log(a)/math.log(2.0) return long(math.floor(log2)) def getFirstBitAsIndex(a): '''Return first bit set to 1 as an index (0->exception, 1->0, 2->1, 3->0, 4->2). horrendous algorithm''' global _firstOnBit15Tuple c = long(a) if c != a: raise ValueError, 'non-integer argument' if c < 1L: raise ValueError, 'argument < 1' fBit = 0 while True: i = c & 0x0FL if i: return fBit + _firstOnBit15Tuple[i] fBit += 4 c >>= 4 _nBitsSet = (0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4) def countBits(a): '''Returns the number of bits set to one ''' global _nBitsSet c = long(a) if c != a: raise ValueError, 'non-integer argument' if c < 1L: if c < 0L: raise ValueError, 'negative argument' return 0 nBits = 0 while c > 0: i = c & 0x0FL if i: nBits += _nBitsSet[i] c >>= 4 return nBits def getFirstBitOnly(a): '''returns a long with only the first bit set to 1. (0->0, 1->1, 2->2, 3->1, 4->4, 5->1) equivalent to pow(2, getFirstOnBitAsNumber(a) - 1) for a > 0''' c = long(a) if c != a: raise ValueError, 'non-integer argument' if c < 1L: if c == 0L: return 0 raise ValueError, 'negative argument' if c&1: return 1 return c&(c^(c-1)) def makeSimpleMask(nTax): if nTax < 1: raise ValueError, 'nTax must be a postive integer.' return (1L << nTax) - 1L def makeSplitFromList(nList): s = 0L for n in nList: s |= (1L << n) return s class SplitCompatEnum: '''incompat is 0, compat is 1, identical is 2. So bool(x) where x is an element of the enum works as expected.''' incompat, compat, identical, _firstIsSubset, _secondIsSubset = range(5) def splitsEqual(first, second, mask, orientSplits = True): return compareSplits(first, second, mask, orientSplits) == (SplitCompatEnum.identical, None) def compareSplits(first, second, mask, orientSplits = True): ''' returns (SplitCompatEnum.identical, None), (SplitCompatEnum.incompat, None), or (SplitCompatEnum.compat, +-splitCode) where splitCode is the indices that are present in one split interpretation, but absent in the other. The sign of splitCode indicates whether you should look in * the descendants of "first" node (if the sign is +) * the ancestors of "first" node (if the sign is -) to find a node that is identical to "second" split. ''' firstMasked = first & mask secondMasked = second & mask firstFlipped = False if orientSplits: origFirst = firstMasked firstMasked = orientSplit(firstMasked, mask) firstFlipped = (firstMasked == origFirst) secondMasked = orientSplit(secondMasked, mask) return compareOrientedSplits(firstMasked, secondMasked, mask, firstFlipped) def compareFirstToOrientSplit(fSplit, split, mask, refBit): fSplit, inverted = (fSplit & refBit) and (fSplit & mask, False) or (invertSplit(fSplit, mask), True) return compareOrientedSplits(fSplit, split, mask, inverted) def compareOrientedSplits(firstMasked, secondMasked, mask, firstWasInverted): '''See compareSplits.''' #_LOG.warn("firstMasked%d, secondMasked=%d, mask=%d, firstWasInverted=%s" %(firstMasked, secondMasked, mask, firstWasInverted and "True" or "False")) if firstMasked == secondMasked: return (SplitCompatEnum.identical, None) interSplit = firstMasked & secondMasked if interSplit: # the (1,1) configuration of bits was found xOrSplit = firstMasked ^ secondMasked firstAndXor = xOrSplit & firstMasked if firstAndXor: # first has bits that second does not have - the (1,0) configuration was found if ((firstMasked | secondMasked) != mask) and (xOrSplit & secondMasked): # there are some bits that both lack - the (0, 0) configuration of bits was found # and second has bits that the first does not - the (0, 1) configuration was found # Thus they are incompatible (1,1), (0,0), (1,0) and (0,1) configurations were all found return (SplitCompatEnum.incompat, None) # Splits are compatible, but # we need to turn off some of the bits in first if we want a match. if firstWasInverted: return (SplitCompatEnum.compat, -firstAndXor) return (SplitCompatEnum.compat, firstAndXor) #the splits are compatible, but we need first to have more bits turned on if we want a match secondAndXor = xOrSplit & secondMasked assert(secondAndXor) # this must be True or we should have returned SplitCompatEnum.identical if firstWasInverted: return (SplitCompatEnum.compat, secondAndXor) return (SplitCompatEnum.compat, -secondAndXor) #there is no split intersection, this can only happen if we are not orienting splits. return (SplitCompatEnum.compat, -secondMasked) def invertSplit(s, mask): '''complements 's' and returns its bits masked by the bit array 'mask'.''' return mask & (~s) def orientSplit(s, mask, refBit = None): '''takes a split 's', the 'mask' of included taxa, and (optionally) a reference bit (if omitted the first bit of mask will be used). Inverts the split if the refBit is not 1''' if not refBit: refBit = getFirstBitOnly(mask) elif not (refBit & mask): raise ValueError, ('Reference bit %ld not found in mask %ld' % (refBit, mask)) return (refBit & s) and s or invertSplit(s, mask) def splitToList(s, mask = -1, oneBased = False, ordinationInMask = False): return [i for i in iterSplitIndices(s, mask, oneBased, ordinationInMask)] def iterSplitIndices(s, mask = -1, oneBased = False, ordinationInMask = False): '''returns the index of each bit that is on in 's' and the mask ('oneBased' to True means that the 0x01 bit is returned as 1 instead of 0). ordinationInMask means that the indices will be the count of the 1's in the mask that are to the right of the bit (not the number of digits to the right of the bit)''' currBitIndex = oneBased and 1 or 0 testBit = 1L maskedSplitRep = s & mask standardOrdination = not ordinationInMask while testBit <= maskedSplitRep: if maskedSplitRep & testBit: yield currBitIndex if standardOrdination or (mask & testBit): currBitIndex += 1 testBit <<= 1 def iterBits(split, mask = -1): '''generator for all of the bits in a 'split' (masked by 'mask'), yields longs with only one bit set.''' testBit = 1L maskedSplitRep = split & mask while testBit <= maskedSplitRep: if maskedSplitRep & testBit: yield testBit testBit <<= 1 def isTrivialSplit(s, mask, checkInverse = True): '''returns False if s (or its complement if 'checkInverse' is True) has >1 bit of 'mask' on.''' m = mask & s if m == getFirstBitOnly(m): return True if m == 0 or m == mask: return True return checkInverse and isTrivialSplit(invertSplit(s, mask), mask, False) if __name__ == '__main__': import doctest doctest.testmod() # This file is part of the PIPRes library # # The PIPRes library is free software; you can redistribute it # and/or modify it under the terms of the GNU Lesser General # Public License as published by the Free Software Foundation; # either version 2.1 of the License, or (at your option) any later # version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free # Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, # MA 02111-1307, USA