// This file is part of BULL, a program for phylogenetic simulations // most of the code was written by Mark T. Holder. // It is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // Some of the code is from publically available source by Paul Lewis, Ziheng Yang, // John Huelsenbeck, David Swofford , and others (as noted in the code). // In fact the main structure of the program was created by modifying Paul Lewis' // basiccmdline.cpp from his NCL // // This code was used in Mark's dissertation, some changes were made in order to // get it to compile on gcc. It is possible that this porting introduced bugs (very // little debugging has been done on UNIX platforms). I would suggest checking // the simulator by generating data on trees with short branches, etc. // This file is part of BULL, a program for phylogenetic simulations // most of the code was written by Mark T. Holder. // It is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // Some of the code is from publically available source by Paul Lewis, Ziheng Yang, // John Huelsenbeck, David Swofford , and others (as noted in the code). // In fact the main structure of the program was created by modifying Paul Lewis' // basiccmdline.cpp from his NCL // // This code was used in Mark's dissertation, some changes were made in order to // get it to compile on gcc. It is possible that this porting introduced bugs (very // little debugging has been done on UNIX platforms). I would suggest checking // the simulator by generating data on trees with short branches, etc. #include "nexus_defs.hpp" #include "xbull.hpp" #include "nexus_token.hpp" #include "nexus.hpp" #include "set_reader.hpp" /** * @class SetReader * @file set_reader.hpp * @file setreader.cpp * @author Paul O. Lewis * @copyright Copyright © 1999. All Rights Reserved. * @variable block [NexusBlock&] the NexusBlock used for looking up labels * @variable max [int] maximum number of elements in the set * @variable settype [int] the type of set being read (see enum) * @variable nxsset [IntSet&] reference to the set being read * @variable token [NexusToken&] the token object to use in reading the file * @see NexusReader * @see NexusToken * @see XBull * * A class for reading Nexus set objects and storing them in a set of int values. * The IntSet nxsset will be flushed if it is not empty, and nxsset will be built * up as the set is read, with each element in the list storing a member of * the set (ranges are stored as individual elements). * *

This class handles set descriptions of the following form: *

 * 4-7 15 20-.\3;
 * 
* The above set includes all numbers from 4 to 100 (inclusive) as well as * 105 and every third number from 110 to max. If max were 30, the array * stored would look like this: * * * *
4 5 6 7 15 2023 26 29
*/ /** * @enumeration * @enumitem charset [1] means expect a character set * @enumitem taxsert [2] means expect a taxon set * * For use with the variable settype. Default is 1 (charset). */ /** * @constructor * * Initializes token to t and nxsset to iset, then * erases nxsset (if it is nonempty). */ SetReader::SetReader( NexusToken& t, int maxValue, IntSet& iset, NexusBlock& nxsblk , int type /* = 1 */ ) : token(t), nxsset(iset), max(maxValue), block(nxsblk), settype(type) { if ( !nxsset.empty() ) nxsset.erase( nxsset.begin(), nxsset.end() ); } /** * @method AddRange [bool:protected] * @param first [int] the first member of the range (inclusive) * @param last [int] the last member of the range (inclusive) * @param modulus [int] the modulus to use (if non-zero) * * Adds the range specified by first, last, and modulus to the set. If * modulus is zero (the default value) it is ignored. The parameters * first and last are from the data file and thus have range [1..max]. * We store them with offset = 0 in nxsset (i.e., subtract 1 from every * value stored). */ bool SetReader::AddRange( int first, int last, int modulus /* = 0 */ ) { if ( last > max || first < 1 || first > last ) return false; for ( int i = first-1; i < last; i++ ) { int diff = i-first+1; if ( modulus > 0 && diff % modulus != 0 ) continue; nxsset.insert(i); } return true; } /** * @method GetTokenValue [int:private] * @throws XBull * * Tries to interpret token as a number. Failing that, * tries to interpret token as a character or taxon * label, which it then converts to a number. Failing * that, it throws an XBull exception. */ int SetReader::GetTokenValue() { int v = atoi( token.GetToken().c_str() ); if ( v == 0 ) { if ( settype == SetReader::charset ) v = block.CharLabelToNumber( token.GetToken() ); else v = block.TaxonLabelToNumber( token.GetToken() ); } if ( v == 0 ) { block.errormsg = "Set element ("; block.errormsg += token.GetToken(); block.errormsg += ") not a number and not a valid "; if ( settype == SetReader::charset ) block.errormsg += "character"; else block.errormsg += "taxon"; block.errormsg += " label"; throw XBull( block.errormsg, token); } return v; } /** * @method Run [void:public] * @throws XBull * * Reads in a set from a NEXUS data file. */ void SetReader::Run() { bool ok; int rangeBegin = -1; int rangeEnd = rangeBegin; bool insideRange = false; int modValue = 0; for (;;) { // next token should be one of the following: // ';' --> set definition finished // '-' --> range being defined // int --> member of set (or beginning or end of a range) // '.' --> signifies the number max // '\' --> signifies modulus value coming next // token.GetNextToken(); if ( token.Equals("-") ) { // We should not be inside a range when we encounter a hyphenation symbol. // The hyphen is what _puts_ us inside a range! if ( insideRange ) { block.errormsg = "The symbol '-' is out of place here"; throw XBull( block.errormsg, token); } insideRange = true; } else if ( token.Equals(".") ) { // We _should_ be inside a range if we encounter a period, as this // is a range termination character if ( !insideRange ) { block.errormsg = "The symbol '.' can only be used to specify the end of a range"; throw XBull( block.errormsg, token); } rangeEnd = max; } else if ( token.Equals("\\") ) { // The backslash character is used to specify a modulus to a range, and // thus should only be encountered if currently inside a range if ( !insideRange ) { block.errormsg = "The symbol '\\' can only be used after the end of a range has been specified"; throw XBull( block.errormsg, token); } token.GetNextToken(); modValue = atoi( token.GetToken().c_str() ); if ( modValue <= 0 ) { block.errormsg = "The modulus value specified ("; block.errormsg += token.GetToken(); block.errormsg += ") is invalid; must be greater than 0"; throw XBull( block.errormsg, token); } } else if ( insideRange && rangeEnd == -1 ) { // The beginning of the range and the hyphen symbol have been read // already, just need to store the end of the range at this point rangeEnd = GetTokenValue(); } else if ( insideRange ) { // If insideRange is true, we must have already stored the beginning // of the range and read in the hyphen character. We would not have // made it this far if we had also not already stored the range end. // Thus, we can go ahead and add the range. ok = AddRange( rangeBegin, rangeEnd, modValue ); if ( !ok ) { block.errormsg = "Character number out of range (or range incorrectly specified) in set specification"; throw XBull( block.errormsg, token); } // We have actually already read in the next token, so deal with it // now so that we don't end up skipping a token if ( token.Equals(";") ) break; rangeBegin = GetTokenValue(); rangeEnd = -1; insideRange = false; } else if ( rangeBegin != -1 ) { // If we were inside a range, we would have not gotten this far. // If not in a range, we are either getting ready to begin a new // range or have previously read in a single value. Handle the // latter possibility here. ok = AddRange( rangeBegin, rangeBegin, modValue ); if ( !ok ) { block.errormsg = "Character number out of range (or range incorrectly specified) in set specification"; throw XBull( block.errormsg, token); } if ( token.Equals(";") ) break; rangeBegin = GetTokenValue(); rangeEnd = -1; } else if ( token.Equals(";") ) break; else if ( token.Equals("ALL") ) { rangeBegin = 1; rangeEnd = max; ok = AddRange( rangeBegin, rangeEnd ); } else { // Can only get here if rangeBegin still equals -1 and thus we // are reading in the very first token and that token is neither // the word "all" nor is it a semicolon rangeBegin = GetTokenValue(); rangeEnd = -1; } } }