#!/usr/bin/python # Copyright (c) 2005 by Mark T. Holder, Florida State University. (see end of file) '''Parses and writes a simple form of XML for serializing PIPRes objects as XML''' from PIPRes.util.xml_to_obj import * from PIPRes.tree import numberedLeafTree, TreeWithSplits from PIPRes.splits import splitRepToSplit from xml.sax.saxutils import escape, quoteattr _keywordToParser = {} _parserToKeyword = {} class RealizationXMLElement(SAXConstructible): '''Holds the results of analysis of a simulation realization.''' def _getPIPResTuple(self): return [_parserToKeyword[self.__class__], self] class SupportFreqXMLElement(SAXConstructible): '''Holds the results of analysis of a simulation realization wrt one split''' def _getPIPResTuple(self): return [_parserToKeyword[self.__class__], self] class StatXMLElement(SAXConstructible): '''Holds a float (named value) with strings for "name" and "type" to provide context.''' def _getPIPResTuple(self): return [_parserToKeyword[self.__class__], self] def endSelfElement(self, name): try: self.value = float(self.value) except: raise ValueError, 'Expecting a real number in the value attribute' class TreeXMLElement(SAXConstructible): def _getPIPResTuple(self): return [_parserToKeyword[TreeXMLElement], numberedLeafTree(self.newick)] def toXML(o): return '<%s newick=%s/>' % (_parserToKeyword[TreeXMLElement], quoteattr(o.m_newick[:-1])) toXML = staticmethod(toXML) class GroupXMLElement(SAXConstructible): def _getPIPResTuple(self): return [_parserToKeyword[self.__class__], self] def toXML(o): return '\n %s\n\n' % (self.name,'\n '.join(toPipresXMLElement(i) for i in o._allElements)) toXML = staticmethod(toXML) class SimConditionsXMLElement(GroupXMLElement): def endSelfElement(self, name): try: self.nSites = long(self.nSites) except: raise ValueError, 'Expecting a nSites attribute with an integer representing the number of sites to simulate' self.nRealizations = self.nRealizations and long(self.nRealizations) or 0 self.mixingProportion = self.mixingProportion and map(float, self.mixingProportion.split()) or [] def toXML(o): contained = '\n '.join(toPipresXMLElement(i) for i in o._allElements) return '%s' % (self.nSites, self.nRealizations, contained, " ".join(self.mixingProportion)) class SplitXMLElement(SAXConstructible): def _getPIPResTuple(self): if self.decimal: split = long(self.decimal) elif self.dotstar: split = splitRepToSplit(self.dotstar) elif self.newick: t = numberedLeafTree(self.newick, treeFactory=TreeWithSplits) splitList = t.getSplits(includeTrivials=False) if len(splitList) != 1: raise ValueError, "Expecting split's newick representation to have exactly one branch" split = splitList[0] elif self.index: ind = int(self.index) split = 1L << ind else: raise ValueError, 'split element should have a attribute (%s).' % '|'.join(SplitXMLElement._CTH.attributesToRead) return [_parserToKeyword[SplitXMLElement], split] def toXML(o): return '<%s decimal="%ld"/>' % (_parserToKeyword[SplitXMLElement], o) toXML = staticmethod(toXML) class CrudePipresXMLDocument(SAXConstructible): def parseFileObj(self, fileObj): if not SAXConstructible.parseFileObj(self, fileObj): return False self._toPIPResObjects() return True def _toPIPResObjects(self): self.pipresObjects = [i._getPIPResTuple() for i in self._allElements] def toXML(o): return '\n %s\n\n' % (self.name,'\n '.join(toPipresXMLElement(i) for i in o._allElements)) toXML = staticmethod(toXML) def append(self, o): self._allElements.append(o) def xmlToPipres(fileObj, keys = None): '''returns list or dict of PIPRes objects from an xml file obj. If keys is a list of strings a dict will be returned with these keys and list of matching objects. Keys must be in _keywordToParser dict. Keys that represent types not found in the xml will not be included in the returned dict (thus the values should not contain empty lists). If keys is None, will return a list of [keyword, object] lists.''' parser = CrudePipresXMLDocument('pipres-crude', {}) if not parser.parseFileObj(fileObj): raise ValueError, 'file is not a CrudePipresXML document' p = parser.pipresObjects if keys: d = {} for k in keys: d[k] = [] for o in p: typeTag = o[0] if typeTag in keys: if typeTag not in d: d[typeTag] = [] d[typeTag].append(o[1]) return d return p def xmlFilePathToPipres(filePath, keys = None): return xmlToPipres(open(filePath, 'rU'), keys) class _Container: '''container for serializing random data (using toPipresXMLByKeyword "generic" mechanism)''' def __init__(self, elName = 'group'): self._elName = elName self._allElements = [] def append(self, o): self._allElements.append(o) def toPipresXML(self): return toPipresXMLByKeyword('generic' + self._elName, self) def toXMLGrouping(elName): return _Container(elName) def toPipresXMLByKeyword(keyword, o): global _keywordToParser flag = 'generic' p = _keywordToParser.get(keyword) if p: return p.toXML(o) elif keyword.startswith(flag): tag = (keyword == flag) and keyword or keyword[len(flag):] d = isinstance(o, dict) and d or o.__dict__ l = ['%s=%s' % (k, quoteattr(str(v))) for k, v in d.iteritems() if not k.startswith('_')] allEls = d.get('_allElements') c = allEls and '\n '.join(toPipresXMLElement(i) for i in allEls) or '' return '<%s %s>\n %s' % (tag, ' '.join(l), c, tag) def toPipresXMLElement(obj): if isinstance(obj, list): if obj[0] is None: obj = obj[1] else: return toPipresXMLByKeyword(obj[0], obj[1]) return obj.toPipresXML() def toPipresXML(objs): return '\n %s\n\n' % '\n '.join(toPipresXMLElement(i) for i in objs) StatXMLElement._CTH = ChildrenToHandle( attr = [ 'type', 'name', 'value', ]) SupportFreqXMLElement._CTH = ChildrenToHandle( singleEl = { 'split' : SplitXMLElement, }, multiEl = { 'stat' : StatXMLElement } ) RealizationXMLElement._CTH = ChildrenToHandle( multiEl = { 'supportfreq' : SupportFreqXMLElement } ) SimConditionsXMLElement._CTH = ChildrenToHandle( attr = [ 'nSites', 'nRealizations', 'mixingProportion', ], multiEl = { 'realization' : RealizationXMLElement } ) TreeXMLElement._CTH = ChildrenToHandle( attr = [ 'newick', ]) SplitXMLElement._CTH = ChildrenToHandle( attr = [ 'index', 'decimal', 'dotstar', 'newick' ]) CrudePipresXMLDocument._CTH = ChildrenToHandle( attr = [ 'name' ], multiEl = { 'group': GroupXMLElement, 'tree': TreeXMLElement, 'split' : SplitXMLElement, 'sim_conditions' : SimConditionsXMLElement, 'simcondition' : SimConditionsXMLElement, }) _keywordToParser = dict(CrudePipresXMLDocument._CTH.multiElementDict) for k, v in _keywordToParser.iteritems(): _parserToKeyword[v] = k # This file is part of the PIPRes library # The PIPRes library is free software; you can redistribute it # and/or modify it under the terms of the GNU Lesser General # Public License as published by the Free Software Foundation; # either version 2.1 of the License, or (at your option) any later # version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free # Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, # MA 02111-1307, USA