// This file is part of BULL, a program for phylogenetic simulations
// most of the code was written by Mark T. Holder.
// It is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// Some of the code is from publically available source by Paul Lewis, Ziheng Yang,
// John Huelsenbeck, David Swofford , and others (as noted in the code).
// In fact the main structure of the program was created by modifying Paul Lewis'
// basiccmdline.cpp from his NCL
//
// This code was used in Mark's dissertation, some changes were made in order to
// get it to compile on gcc. It is possible that this porting introduced bugs (very
// little debugging has been done on UNIX platforms). I would suggest checking
// the simulator by generating data on trees with short branches, etc.
// This file is part of BULL, a program for phylogenetic simulations
// most of the code was written by Mark T. Holder.
// It is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// Some of the code is from publically available source by Paul Lewis, Ziheng Yang,
// John Huelsenbeck, David Swofford , and others (as noted in the code).
// In fact the main structure of the program was created by modifying Paul Lewis'
// basiccmdline.cpp from his NCL
//
// This code was used in Mark's dissertation, some changes were made in order to
// get it to compile on gcc. It is possible that this porting introduced bugs (very
// little debugging has been done on UNIX platforms). I would suggest checking
// the simulator by generating data on trees with short branches, etc.
#include "nexus_defs.hpp"
#include "xbull.hpp"
#include "nexus_token.hpp"
#include "nexus.hpp"
#include "set_reader.hpp"
/**
* @class SetReader
* @file set_reader.hpp
* @file setreader.cpp
* @author Paul O. Lewis
* @copyright Copyright © 1999. All Rights Reserved.
* @variable block [NexusBlock&] the NexusBlock used for looking up labels
* @variable max [int] maximum number of elements in the set
* @variable settype [int] the type of set being read (see enum)
* @variable nxsset [IntSet&] reference to the set being read
* @variable token [NexusToken&] the token object to use in reading the file
* @see NexusReader
* @see NexusToken
* @see XBull
*
* A class for reading Nexus set objects and storing them in a set of int values.
* The IntSet nxsset will be flushed if it is not empty, and nxsset will be built
* up as the set is read, with each element in the list storing a member of
* the set (ranges are stored as individual elements).
*
*
This class handles set descriptions of the following form:
*
* 4-7 15 20-.\3;
*
* The above set includes all numbers from 4 to 100 (inclusive) as well as
* 105 and every third number from 110 to max. If max were 30, the array
* stored would look like this:
*
* 4 | 5 | 6 | 7 | 15 | 20 |
* 23 | 26 | 29 |
*
*/
/**
* @enumeration
* @enumitem charset [1] means expect a character set
* @enumitem taxsert [2] means expect a taxon set
*
* For use with the variable settype. Default is 1 (charset).
*/
/**
* @constructor
*
* Initializes token to t and nxsset to iset, then
* erases nxsset (if it is nonempty).
*/
SetReader::SetReader( NexusToken& t, int maxValue, IntSet& iset, NexusBlock& nxsblk
, int type /* = 1 */ )
: token(t), nxsset(iset), max(maxValue), block(nxsblk), settype(type)
{
if ( !nxsset.empty() )
nxsset.erase( nxsset.begin(), nxsset.end() );
}
/**
* @method AddRange [bool:protected]
* @param first [int] the first member of the range (inclusive)
* @param last [int] the last member of the range (inclusive)
* @param modulus [int] the modulus to use (if non-zero)
*
* Adds the range specified by first, last, and modulus to the set. If
* modulus is zero (the default value) it is ignored. The parameters
* first and last are from the data file and thus have range [1..max].
* We store them with offset = 0 in nxsset (i.e., subtract 1 from every
* value stored).
*/
bool SetReader::AddRange( int first, int last, int modulus /* = 0 */ )
{
if ( last > max || first < 1 || first > last )
return false;
for ( int i = first-1; i < last; i++ ) {
int diff = i-first+1;
if ( modulus > 0 && diff % modulus != 0 )
continue;
nxsset.insert(i);
}
return true;
}
/**
* @method GetTokenValue [int:private]
* @throws XBull
*
* Tries to interpret token as a number. Failing that,
* tries to interpret token as a character or taxon
* label, which it then converts to a number. Failing
* that, it throws an XBull exception.
*/
int SetReader::GetTokenValue()
{
int v = atoi( token.GetToken().c_str() );
if ( v == 0 )
{
if ( settype == SetReader::charset )
v = block.CharLabelToNumber( token.GetToken() );
else
v = block.TaxonLabelToNumber( token.GetToken() );
}
if ( v == 0 )
{
block.errormsg = "Set element (";
block.errormsg += token.GetToken();
block.errormsg += ") not a number and not a valid ";
if ( settype == SetReader::charset )
block.errormsg += "character";
else
block.errormsg += "taxon";
block.errormsg += " label";
throw XBull( block.errormsg, token);
}
return v;
}
/**
* @method Run [void:public]
* @throws XBull
*
* Reads in a set from a NEXUS data file.
*/
void SetReader::Run()
{
bool ok;
int rangeBegin = -1;
int rangeEnd = rangeBegin;
bool insideRange = false;
int modValue = 0;
for (;;)
{
// next token should be one of the following:
// ';' --> set definition finished
// '-' --> range being defined
// int --> member of set (or beginning or end of a range)
// '.' --> signifies the number max
// '\' --> signifies modulus value coming next
//
token.GetNextToken();
if ( token.Equals("-") )
{
// We should not be inside a range when we encounter a hyphenation symbol.
// The hyphen is what _puts_ us inside a range!
if ( insideRange ) {
block.errormsg = "The symbol '-' is out of place here";
throw XBull( block.errormsg, token);
}
insideRange = true;
}
else if ( token.Equals(".") )
{
// We _should_ be inside a range if we encounter a period, as this
// is a range termination character
if ( !insideRange ) {
block.errormsg = "The symbol '.' can only be used to specify the end of a range";
throw XBull( block.errormsg, token);
}
rangeEnd = max;
}
else if ( token.Equals("\\") )
{
// The backslash character is used to specify a modulus to a range, and
// thus should only be encountered if currently inside a range
if ( !insideRange ) {
block.errormsg = "The symbol '\\' can only be used after the end of a range has been specified";
throw XBull( block.errormsg, token);
}
token.GetNextToken();
modValue = atoi( token.GetToken().c_str() );
if ( modValue <= 0 ) {
block.errormsg = "The modulus value specified (";
block.errormsg += token.GetToken();
block.errormsg += ") is invalid; must be greater than 0";
throw XBull( block.errormsg, token);
}
}
else if ( insideRange && rangeEnd == -1 )
{
// The beginning of the range and the hyphen symbol have been read
// already, just need to store the end of the range at this point
rangeEnd = GetTokenValue();
}
else if ( insideRange )
{
// If insideRange is true, we must have already stored the beginning
// of the range and read in the hyphen character. We would not have
// made it this far if we had also not already stored the range end.
// Thus, we can go ahead and add the range.
ok = AddRange( rangeBegin, rangeEnd, modValue );
if ( !ok ) {
block.errormsg = "Character number out of range (or range incorrectly specified) in set specification";
throw XBull( block.errormsg, token);
}
// We have actually already read in the next token, so deal with it
// now so that we don't end up skipping a token
if ( token.Equals(";") )
break;
rangeBegin = GetTokenValue();
rangeEnd = -1;
insideRange = false;
}
else if ( rangeBegin != -1 )
{
// If we were inside a range, we would have not gotten this far.
// If not in a range, we are either getting ready to begin a new
// range or have previously read in a single value. Handle the
// latter possibility here.
ok = AddRange( rangeBegin, rangeBegin, modValue );
if ( !ok ) {
block.errormsg = "Character number out of range (or range incorrectly specified) in set specification";
throw XBull( block.errormsg, token);
}
if ( token.Equals(";") )
break;
rangeBegin = GetTokenValue();
rangeEnd = -1;
}
else if ( token.Equals(";") )
break;
else if ( token.Equals("ALL") ) {
rangeBegin = 1;
rangeEnd = max;
ok = AddRange( rangeBegin, rangeEnd );
}
else {
// Can only get here if rangeBegin still equals -1 and thus we
// are reading in the very first token and that token is neither
// the word "all" nor is it a semicolon
rangeBegin = GetTokenValue();
rangeEnd = -1;
}
}
}