#include "phycas/phycas.h"
#if (MWERKS_LIB_BUILD)
#	pragma export on
#endif
#include "ncl/nxs_defs.hpp"
#include "ncl/nxs_token.hpp"
#include "ncl/nxs_exception.hpp"
#include "ncl/output/nxs_output.hpp"
#if (MWERKS_LIB_BUILD)
#	pragma export off
#endif

#if defined(C_FUNCS_IN_STD_NAMESPACE)
	using std::strcpy;
	using std::isgraph;
#endif
using std::string;

const unsigned kNxsTokenInternalBufferSize = 8192;

/*--------------------------------------------------------------------------------------------------------------------------
| 
*/
NxsToken::~NxsToken()
	{
	delete intStrStream;
#	if NXS_INTERNAL_TOKEN_BUFFER
		delete [] localBuffer;
#	endif
	}

/*--------------------------------------------------------------------------------------------------------------------------
| 	
*/
NxsTokenizerState::NxsTokenizerState(
  const NxsToken &t)	 /* current token */
  	: posInfo(t.GetFilePosition(), t.GetFileLine(), t.GetFileColumn(), t.nextCharInStream, t.AtEOF(), t.AtEOL()), 
  	token(t.GetTokenReference()) 
  	{
#	if defined(NXS_INTERNAL_TOKEN_BUFFER)
		posBeforeLastRead  = t.posInfo.posBeforeLastRead;	/* file position. the number of characters from the beginning of the stream */
		posInLocalBuffer   = t.posInfo.posInLocalBuffer;
		nCharsInLocalBuffer = t.posInfo.posInLocalBuffer;
		lastReadHitEnd = t.posInfo.lastReadHitEnd;
#	endif
  	}

/*--------------------------------------------------------------------------------------------------------------------------
| 	
*/
NxsTokenizerState::NxsTokenizerState()	 /* current token */
  	: posInfo(0,0,0,'\0',false,false), 
  	token(string()) 
  	{
#	if defined(NXS_INTERNAL_TOKEN_BUFFER)
		posBeforeLastRead  = 0;	/* file position. the number of characters from the beginning of the stream */
		posInLocalBuffer   = 0;
		nCharsInLocalBuffer = 0;
		lastReadHitEnd = false;
#	endif
  	}


VecString NxsToken::TokenizeString(const std::string &s)
	{
	const string nstr(s);
	NxsToken token(nstr);
	VecString v;
	if (!token.AtEOF())
		{
		token.ReadToken();
		for (;;)
			{
			v.push_back(token.GetTokenReference());
			if (token.AtEOF())
				break;
			++token;
			}
		}
	return v;
	}
	
#if defined(NXS_INTERNAL_TOKEN_BUFFER)
	NxsToken::InternalPosInfo::InternalPosInfo()
		:posBeforeLastRead(0),
		posInLocalBuffer(0),
		nCharsInLocalBuffer(0),
		lastReadHitEnd(false),
		fileLine(0),
		fileColumn(0),
		atEOF(false),
		atEOL(false)
		{
		}
#endif

/*--------------------------------------------------------------------------------------------------------------------------
| 	Writes `s' to the output comment stream if the NxsToken::nxsOutCommentStream pointer is not NULL
*/
void NxsToken::OutputComment(const string &s) const
	{
	NxsOutput::GetNullCheckingStream<NxsOutput::kOutputComment>() << s.c_str() << ncl::endl;
	}

/*--------------------------------------------------------------------------------------------------------------------------
| 	Reads a token. treating - as if it were NOT a token breaker.
|	If the token can be treated as a double, dbl will be set to the double value and true will be returned
|	otherwise false will be returned (and dbl will not be changed).
*/
bool NxsToken::ReadDoubleToken(double *dbl)
	{
	const bool hyphenWasPuncOnEntry(tokenStateHyphenIsPunc);
	if (hyphenWasPuncOnEntry)
		AlterTokenReading(kHyphenNotPunctuation);
	try	{
		ReadToken();
		}
	catch (...)
		{
		if (hyphenWasPuncOnEntry)
			AlterTokenReading(kHyphenIsPunctuation);
		throw;
		}
	if (hyphenWasPuncOnEntry)
		AlterTokenReading(kHyphenIsPunctuation);
	return IsADouble(GetTokenReference(), dbl);
	}


#if defined(NXS_INTERNAL_TOKEN_BUFFER)
	void NxsToken::FillInternalBuffer()
		{
		posInfo.posBeforeLastRead = inputStream.tellg();
		posInfo.posInLocalBuffer = 0;
		if (posInfo.lastReadHitEnd)
			posInfo.nCharsInLocalBuffer = 0;
		else
			{
			inputStream.read(localBuffer, kNxsTokenInternalBufferSize);
			posInfo.nCharsInLocalBuffer = inputStream.gcount();
			posInfo.lastReadHitEnd = (posInfo.nCharsInLocalBuffer < kNxsTokenInternalBufferSize);
			}
		}
#endif

void NxsToken::Initialize()
	{
#	if defined(NXS_INTERNAL_TOKEN_BUFFER)
		if (!localBuffer)
			localBuffer = new char [kNxsTokenInternalBufferSize];
		this->FillInternalBuffer();
#	else
		posInfo.filePos = inputStream.tellg();
#	endif
	posInfo.fileLine = 1L;
	posInfo.fileColumn = 1L;
	posInfo.atEOF = posInfo.atEOL = false;
 
#	if defined (SUPPORT_NXS_TOKEN_LABILE_FLAGS)
		saved = '\0';
		special = '\0';
		labileFlags = 0;		
#	endif

	//	by default hyphen is a single token character, but newline isn't 
	//
	strcpy(currentSingleTokenStr,"(){}\"]/\\,;:=*`+<>-");
	currentSingleTokenStr[19]= '\0';
	
	//	by default hyphen- is a token breaker, (newline always is)
	//
	strcpy(currentTokenBreakerStr," \t\n;\'()]{}/\\,:=*\"`+<>-");
	
	//	read the first character into  nextCharInStream
	//
	nextCharInStream = 'a';	//anything other than EOF will work
	AdvanceToNextCharInStream();
	}

/*--------------------------------------------------------------------------------------------------------------------------
| 	Creates a NxsToken to read the stream i.
*/
NxsToken::NxsToken(
  std::istream & i) 
  : intStrStream(NULL),
# if defined(NXS_INTERNAL_TOKEN_BUFFER)
	localBuffer(NULL), 
	inputStream(i),
# else
    inputStream(i),
# endif
   eofAllowed(true),
  newLineCheck('\n'),
  tokenStateHyphenIsPunc(true)
	{
	Initialize();
	}

/*--------------------------------------------------------------------------------------------------------------------------
| 	Creates a NxsToken to read the stream i.
*/
NxsToken::NxsToken(const string & s)
  : intStrStream(new InternalStringAndStream(s)),
# if defined(NXS_INTERNAL_TOKEN_BUFFER)
    localBuffer(NULL),
	inputStream(intStrStream->strStream) , 
# else
    inputStream(intStrStream->strStream) , 
# endif
  eofAllowed(true),
  newLineCheck('\n'),
  tokenStateHyphenIsPunc(true)
	{
	Initialize();
	}

/*--------------------------------------------------------------------------------------------------------------------------
| 	Advance the stream and store it in nextCharInStream.  Deal with the 3 ways of specifying return charaters 
|		(nextCharInStream will be set to \n if any of the return styles are found)
*/	
inline void NxsToken::AdvanceToNextCharInStream()
	{
	if (nextCharInStream == EOF)
		return;
#	if defined(NXS_INTERNAL_TOKEN_BUFFER)
		if (posInfo.posInLocalBuffer == posInfo.nCharsInLocalBuffer)
			{
			if (posInfo.lastReadHitEnd == true) 
				{
				nextCharInStream = EOF;
				return;
				}
			this->FillInternalBuffer();
			}
		nextCharInStream  = this->localBuffer[posInfo.posInLocalBuffer++];
		if (nextCharInStream == 13 || nextCharInStream == 10)
			{
			nextCharInStream = '\n';
			if(nextCharInStream == 13)
				{
				if (posInfo.posInLocalBuffer == posInfo.nCharsInLocalBuffer)
					{
					this->FillInternalBuffer();
					if (posInfo.nCharsInLocalBuffer == 0)
						return;
					}
				if (this->localBuffer[posInfo.posInLocalBuffer] == 10)	//peeks at the next char
					posInfo.posInLocalBuffer++;
				}
			}
#	else
		nextCharInStream  = (char) (inputStream.rdbuf())->sbumpc();
		if (nextCharInStream == 13 || nextCharInStream == 10)
			{
			if(nextCharInStream == 13)
				{
				if ((inputStream.rdbuf())->sgetc() == 10)	//peeks at the next char
					(inputStream.rdbuf())->sbumpc();
				}
			nextCharInStream = '\n';
			}
#	endif
	}

/*--------------------------------------------------------------------------------------------------------------------------
|	returns the character that had been stored in nextCharInStream, but also calls AdvanceToNextCharInStream() so 
|	nextCharInStream is advanced.
|	Does all of the fileposition bookkeeping.
|	Throws an NxsX_UnexpectedEOF exception if eof is found but eofAllowed is false.
*/
inline char NxsToken::ReadNextChar()
	{
	//	filepos = in.tellg();
	//
	// 	Why this was changed:  calls to tellg seem slow and unnecessary - we're storing filepos in terms of the
	//	number of times we call sbumpc().
	//	if we go back to getting the filepos via in.tellg(), remember to call it 
	//	twice after both sgetc() calls in the case of the \13\10 endline
	
	char ch = nextCharInStream;
	AdvanceToNextCharInStream();
	if(ch == EOF)
		{
		posInfo.atEOF = true;
		if (eofAllowed)
			return '\0';
		throw NxsX_UnexpectedEOF(*this);
		}
	if(ch == '\n')
		{
		posInfo.fileLine++;
		posInfo.fileColumn = 1L;
		posInfo.atEOL = true;
		return '\n';
		}
	if (ch == '\t')
		posInfo.fileColumn += 4 - ((posInfo.fileColumn - 1)%4);	//@assumes that tab will be 4 in the editor we use
	else
		posInfo.fileColumn++;
	posInfo.atEOL = false;
	return ch;
	}

/*--------------------------------------------------------------------------------------------------------------------------
|	Reads in a comment.  Called after [ is found.  a NxsComment is created and pushed back onto the vector of comments in 
|	this token.  The index of the comment in the vector is returned.
*/
string	NxsToken::ReadCommandCommentProgramIdentifier()
	{
	string s;
	char ch = ' ';
	while (!isgraph(ch) && nextCharInStream != ']')
		ch = ReadNextChar();
	while (isgraph(ch) && nextCharInStream != ']')
		{
		s << ch;
		ch = ReadNextChar();
		}
	return s;
	}
	
unsigned NxsToken::ReadComment()
	{
	bool prevAllowEOF = eofAllowed;
	SetEOFAllowed(false);
	unsigned retVal = (unsigned)comments.size();
	try	{
		string progSpec;
		string commentBody;
		char modChar = '\0';
		// see if first character is the output comment symbol ('!')
		// or command comment symbol (&)
		char ch = ReadNextChar();
		if(IsCommentModifier(ch))
			{
			modChar = ch;
			if (ch == '&' && nextCharInStream == '&')
				{
				ReadNextChar();
				progSpec = ReadCommandCommentProgramIdentifier();
				}
			ch = ReadNextChar();
			}
		for(int level = 1;; ch = ReadNextChar())	
			{
			if(ch == ']')
				{
				--level;
				if(level == 0)
					break;
				}
			else if(ch == '[')
				++level;
			commentBody << ch;
			}
		comments.push_back(NxsComment(commentBody, GetTokenLength(), modChar, progSpec));
	  	if(modChar == '!') 
			OutputComment(commentBody);
		}
	catch (NxsX_UnexpectedEOF & x)
		{
		x.msg << " in a comment";
		throw x;
		}
	SetEOFAllowed(prevAllowEOF);
	return retVal;
	}
	
/*--------------------------------------------------------------------------------------------------------------------------
|	Gets remainder of a quoted Nexus word (the first single quote character was read in already by ReadToken). This 
|	function reads characters until the next single quote is encountered.  An exception occurs if two single quotes occur 
|	one after the other, in which case the function continues to gather characters until an isolated single quote is found. 
|	The tandem quotes are stored as a single quote character in the token string.
*/
void NxsToken::ReadQuoted()
	{		// Note: within quoted tokens, underscores  should be preserved
	// always throw a NxsX_UnexpectedEOF if you reach the eof in the middle of a token
	bool prevEOFAllowed = eofAllowed;
	eofAllowed = false;
	for(;;)
		{
		char ch = ReadNextChar();
		if (ch == '\'')
			{
			if (nextCharInStream== '\'') 
				ReadNextChar();	//skip the second '
			else
				{
				eofAllowed = prevEOFAllowed;
				return;
				}
			}
		AppendToToken(ch);
		}
	}


/*--------------------------------------------------------------------------------------------------------------------------
|	Called to read the rest of a token (everything after the character ch).  This function assumes that the beginning of 
|	a token has been found (through GetNextGraphChar())
*/
void NxsToken::AppendRestOfToken(
  char ch)	/* the last character read before this function was called*/
	{	
	if (CharIsATokenByItself(ch))
		AppendToToken(ch);
	else
		{
		if(ch == '\'')
			{
			ReadQuoted();
#			if defined (NXS_THROW_IF_EMPTY_TOKENS)
				if (token.empty())
					throw NxsX_EmptyToken(*this);
#			else
				return;
#			endif
			}
		else
			{
			AppendToToken(ch);
			for(;!IsTokenBreaker(nextCharInStream) && !AtEOF();)
				{
				if (nextCharInStream == '[')
					{
					ReadNextChar();
					ReadComment();
					}
				else if (nextCharInStream == '_')
					{
					AppendToToken(' ');
					ReadNextChar();
					}
				else 
					{
					ch = ReadNextChar();
					if (ch != '\0')
						AppendToToken(ch);
					else
						return;
					}
				}
			}
		}
	}
	
/*--------------------------------------------------------------------------------------------------------------------------
|	Reads characters from in until a complete token has been read and stored in token.  Performs a number of useful 
|	operations in the process of retrieving tokens:
|		underscore characters encountered are stored as blank spaces (except in single quoted tokens).
|		strings inside single quotes are returned without the surrounding quotes.  Paired single quotes are converted to '
|		comments are handled automatically (normal comments are stored and output comments are passed to the function 
|			the virtual OutputComment(string).
|		leading whitespace is automatically skipped
|		if the end of the file is reached on reading this token, the atEOF flag is set and may be queried using the AtEOF()
|		punctuation characters are always returned as individual tokens.
*/
const std::string & NxsToken::ReadToken()
	{
	ResetToken();
	char ch = ReadFirstGraphChar();
	if (ch != '\0')
		AppendRestOfToken(ch);
	return GetTokenReference();
	}

/*--------------------------------------------------------------------------------------------------------------------------
|	Returns True if command comment was the only thing found (in which case the token length will still be zero
|	it is possible to return false but still have command comments (if they are embedded in a token)
*/
bool NxsToken::ReadCommandCommentOrToken()
	{
	ResetToken();
	
	char ch = ReadFirstGraphCharOrCommandComment();
	//	ch will be 0 if we read a command comment only if this is the case, we're done
	//
	if (ch != '\0')
		{
		AppendRestOfToken(ch);
		return false;
		}
	return !AtEOF();
	}

/*--------------------------------------------------------------------------------------------------------------------------
|	This function is very limited in usefulness.
|	Squashes together (with no whitespace, unless the newline is token is on) all of the tokens until the character 
|	tokenBreaker is encountered.  
|	NOTE: tokenBreaker MUST already be a punctuation character for this function to work.
|	NOTE: NONE of the tokens read can be single-quoted tokens
|	ALSO NOTE:  the terminating tokenBreaker is NOT added to the token.
|	this function is intended for reading the contents of double-quoted strings, () and {}
*/
void NxsToken::ReadAllTokensUntil(
  char tokenBreaker)
  	{
  	assert(IsTokenBreaker(tokenBreaker));
  	ResetToken();
	char ch = ReadFirstGraphChar();
	while (ch != tokenBreaker)
		{
		if (AtEOF())
			{
			string errormsg = "Unexpected end of file while searching for a closing ";
			errormsg << tokenBreaker << " character";
			throw NxsException(errormsg, *this);
			}
		if (ch == '\'')
			{
			string errormsg = "Unexpected single-quoted word while searching for a closing";
			errormsg << tokenBreaker << " character";
			throw NxsException(errormsg, *this);
			}
		if (ch != '\0')	// ch could be \0 if a "bare" command comment is found
			AppendRestOfToken(ch);
		ch = ReadFirstGraphChar();	
		} 
	}
	
/*--------------------------------------------------------------------------------------------------------------------------
|	Returns next darkspace or 0 for the eof.
|	If first darkspace is [ then the comment is read, and the search for the first graphical character continues
*/
char NxsToken::ReadFirstGraphChar()
	{
	if (AtEOF())
		return '\0';
	char ch = ' ';
	do	{
		if (nextCharInStream == '[')
			{
			ReadNextChar();
			ReadComment();
			}
		else 
			{
			ch = ReadNextChar();
			if (AtEOF())
				return '\0';
			}
		}
	while (IsWhitespace(ch));
	if (ch == '_')
		return ' ';
	return ch;
	}
	
/*--------------------------------------------------------------------------------------------------------------------------
|	Returns next darkspace or 0 (if the eof or a command comment is found)
|	Just like ReadFirstGraphChar, except that it returns '\0' if a command comment is found.
*/
char NxsToken::ReadFirstGraphCharOrCommandComment()
	{
	if (AtEOF())
		return '\0';
	char ch = ' ';
	do	{
		if (nextCharInStream == '[')
			{
			ReadNextChar();
			unsigned comInd = ReadComment();
			if (comments[comInd].IsCommandComment())
				return '\0';
			if (IsWhitespace(nextCharInStream))
				{
				comments.clear();//flush comments that aren't adjacent to a token;
				ReadNextChar();
				}
			else
				ch = ReadNextChar();
			}
		else 
			{
			ch = ReadNextChar();
			if (AtEOF())
				return '\0';
			}
		}
	while (IsWhitespace(ch));
	if (ch == '_')
		return ' ';
	return ch;
	}
	
/*--------------------------------------------------------------------------------------------------------------------------
|	Sets the NxsToken to the file position and the token described in the argument.  The tokenizer state should have been
|	obtained via a call of GetTokenizerState() with this NxsToken object.
|	Note reported file positions in the NxsToken are actually one character off (hence AdvanceToNextCharInStream() is 
|	called after the file info is set).
*/
void NxsToken::SeekTokenizerState(
  const NxsTokenizerState &tpi)
	{
	using std::ios;
	token = tpi.token;
	posInfo.fileColumn = tpi.posInfo.filecol;
	posInfo.fileLine = tpi.posInfo.fileline;
	posInfo.atEOF = tpi.posInfo.atEOF;
	posInfo.atEOL = tpi.posInfo.atEOL;
#	if defined(NXS_INTERNAL_TOKEN_BUFFER)
		if (tpi.posBeforeLastRead != posInfo.posBeforeLastRead)
			{
				//need to make the local buffer's match.
			inputStream.seekg(posInfo.posBeforeLastRead);
			FillInternalBuffer();
			if (tpi.posInLocalBuffer > posInfo.nCharsInLocalBuffer)
				throw NxsX_InvalidTokenizerState(); //this shouldn't happen.  Should try to figure out if these cases are IO errors, or bad arguments to this function
			}
		posInfo.lastReadHitEnd = tpi.lastReadHitEnd;
		posInfo.posInLocalBuffer = tpi.posInLocalBuffer;
		nextCharInStream = tpi.posInfo.nextCharInStream;
#	else
		posInfo.filePos = tpi.posInfo.filepos;
		inputStream.seekg(posInfo.filePos);
		inputStream.rdbuf()->pubseekoff(-1, ios::cur, ios::in);
		nextCharInStream = tpi.posInfo.nextCharInStream;
		AdvanceToNextCharInStream();
#	endif

	}
	
/*--------------------------------------------------------------------------------------------------------------------------
|	returns the NxsComment with commentIndex.  Throws XInvalidCommentIndex if the commentIndex is greater than the number of
|	comments stored.
*/
NxsComment NxsToken::GetComment(
  unsigned commentIndex) const
	{
	if (commentIndex >= comments.size())
		throw NxsX_InvalidCommentIndex();
	return comments[commentIndex];
	}

/*--------------------------------------------------------------------------------------------------------------------------
|	Returns the index of the first command comment that has the same commandSpecifier (commandSpec) and with an 
|	index >= searchFrom.
|	If commandSpec is '\0' ALL comments are returned
*/
unsigned NxsToken::GetNextCommentIndex(
  char commandSpec,
  unsigned searchFrom) const
  	{
  	for (; searchFrom < comments.size(); ++searchFrom)
  		{
  		if (commandSpec == '\0' || comments[searchFrom].MatchesModifier(commandSpec))
  			return searchFrom;
  		}
  	return UINT_MAX;
  	}

/*--------------------------------------------------------------------------------------------------------------------------
|	Used to modify the standard tokenizing rules to make it easier to read some parts of the Nexus format.
|	Should be called with the argument kNewlineIsToken if newlines need to be read as tokens (for instance when reading 
|		interleaved matrices).  This setting will continue to affect the NxsToken until AlterTokenReading(kNewlineIsNotToken)
|		is called.
|	Use AlterTokenReading(kHyphenNotPunctuation) to read in tokens that might contain hyphens as negative signs (to stop - from
|		breaking tokens).  AlterTokenReading(kHyphenIsPunctuation) can then be called to return to the default state of 
|		treating hyphens as punctuation characters.
|
|	Implementation note:  	this code is pretty odd looking.  the character arrays currentSingleTokenStr and currentTokenBreakerStr
|		are used as argument to strchr when NxsToken is deciding if the next character constitutes a token by itself or should
|		break the current token.  Calls to AlterTokenReading modify these character arrays. 
|		IsWhitespace checks if the character is ==' ' or =='\t' or < 7 or == newLineCheck.  So when AlterTokenReading(kNewlineIsToken)
|		is called, newLineCheck is set to ' '.  This means that \n will no longer trigger yes to the question IsWhitespace.
|		AlterTokenReading(kNewlineIsNotToken) returns newLineCheck to the more intuitive value of '\n'
|
 */
 void NxsToken::AlterTokenReading(
   NxsTokenFlags bit ) /* specifies what tokenizing change is being requested */
	{
	switch (bit)
		{
   		case (kNewlineIsToken) :
    		if (currentSingleTokenStr[17] =='-')
    			currentSingleTokenStr[18] = '\n';
    		else
    			currentSingleTokenStr[17] = '\n';
    		//currentWhitespaceStr[3] = '\0';
    		newLineCheck = ' ';
    		break;
   		
    	case (kNewlineIsNotToken) :
    		if (currentSingleTokenStr[17] == '\n')
    			currentSingleTokenStr[17] = currentSingleTokenStr[18];
    		currentSingleTokenStr[18] = '\0';
    		//currentWhitespaceStr[3] = '\n';
    		newLineCheck = '\n';
    		break;
   		
    	case (kHyphenNotPunctuation) :
    		if (currentSingleTokenStr[17] == '-')
    			currentSingleTokenStr[17] = currentSingleTokenStr[18];
    		currentSingleTokenStr[18] = '\0';
    		currentTokenBreakerStr[21] = '\0';
    		tokenStateHyphenIsPunc = false;
    		break;
    		
   		case (kHyphenIsPunctuation) :
    		if (currentSingleTokenStr[17] =='\n')
    			currentSingleTokenStr[18] = '-';
    		else
    			currentSingleTokenStr[17] = '-';
    		currentTokenBreakerStr[21] = '-';
    		tokenStateHyphenIsPunc = true;
    		break;
   		
    	}
	}

/*--------------------------------------------------------------------------------------------------------------------------
|	Throws a NxsException with the message "Expecing s but found current_token" where s is the argument, and current_token
|	is the string currently in the NxsToken.
|	Written so that ThrowIfNot() could be inlined with minimal code-bloat (we don't need to inline this function because
|	exception throwing is inherently slow).
*/
void NxsToken::ThrowUnexpectedTokenNxsException(
  const string &s) const	/* string that should be identical to the current token. */
	{
	string e;
	e << s << " was exepected, but " << token << " was entered.";
	throw NxsException(e, *this);
	}


#	if defined (SUPPORT_NXS_TOKEN_LABILE_FLAGS)
		/*----------------------------------------------------------------------------------------------------------------------
		|	Reads characters from in until a complete token has been read and stored in token. GetNextToken performs a number 
		|	of useful operations in the process of retrieving tokens:
		|~
		|	o any underscore characters encountered are stored as blank spaces (unless the labile flag bit preserveUnderscores
		|	  is set)
		|	o if the first character of the next token is an isolated single quote, then the entire quoted string is saved 
		|	  as the next token
		|	o paired single quotes are automatically converted to single quotes before being stored
		|	o comments are handled automatically (normal comments are treated as whitespace and output comments are passed to 
		|	  the function OutputComment which does nothing in the NxsToken class but can be overridden in a derived class to 
		|	  handle these in an appropriate fashion)
		|	o leading whitespace (including comments) is automatically skipped
		|	o if the end of the file is reached on reading this token, the atEOF flag is set and may be queried using the AtEOF 
		|	  member function
		|	o punctuation characters are always returned as individual tokens (see the Maddison, Swofford, and Maddison paper 
		|	  for the definition of punctuation characters) unless the flag ignorePunctuation is set in labileFlags,
		|	  in which case the normal punctuation symbols are treated just like any other darkspace character.
		|~
		|	The behavior of GetNextToken may be altered by using labile flags. For example, the labile flag saveCommandComments 
		|	can be set using the member function SetLabileFlagBit. This will cause comments of the form [&X] to be saved as 
		|	tokens (without the square brackets), but only for the aquisition of the next token. Labile flags are cleared after 
		|	each application.
		*/
		const std::string & NxsToken::GetNextToken()
			{
			ResetToken();

			char ch = ' ';
			if (saved == '\0' || IsWhitespace(saved))
				{
				// Skip leading whitespace
				//
				while( IsWhitespace(ch) && !AtEOF())
					ch = ReadNextChar();
				saved = ch;
				}

			for(;;)
				{
				// Break now if singleCharacterToken mode on and token length > 0.
				//
				if (labileFlags & singleCharacterToken && token.size() > 0)
					break;

				// Get next character either from saved or from input stream.
				//
				if (saved != '\0')
					{
					ch = saved;
					saved = '\0';
					}
				else
					ch = ReadNextChar();

				// Break now if we've hit EOF.
				//
				if (AtEOF())
					break;

				if (ch == '\n' && labileFlags & kNewlineIsToken)
					{
					if (token.size() > 0)
						{
						// Newline came after token, save newline until next time when it will be 
						// reported as a separate token.
						//
						posInfo.atEOL = 0;
						saved = ch;
						}
					else
						{
						posInfo.atEOL = 1;
						AppendToToken(ch);
						}
					break;
					}

				else if (IsWhitespace(ch))
					{
					// Break only if we've begun adding to token (remember, if we hit a comment before a token,
					// there might be further white space between the comment and the next token).
					//
					if (token.size() > 0)
						break;
					}

				else if (ch == '_')
					{
					// If underscores are discovered in unquoted tokens, they should be 
					// automatically converted to spaces.
					//
					if (!(labileFlags & preserveUnderscores))
						ch = ' ';
					AppendToToken(ch);
					}

				else if (ch == '[')
					{
					// Get rest of comment and deal with it, but notice that we only break if the comment ends a token,
					// not if it starts one (comment counts as whitespace). In the case of command comments 
					// (if saveCommandComment) GetComment will add to the token string, causing us to break because
					// token.size() will be greater than 0.
					//
					OLDReadComment();
					if (token.size() > 0)
					break;
					}

				else if (ch == '(' && labileFlags & parentheticalToken)
					{
					AppendToToken(ch);

					// Get rest of parenthetical token.
					//
					OLDReadParentheticalToken();
					break;
					}

				else if (ch == '{' && labileFlags & curlyBracketedToken)
					{
					AppendToToken(ch);

					// Get rest of curly-bracketed token.
					//
					OLDReadCurlyBracketedToken();
					break;
					}

				else if (ch == '\"' && labileFlags & doubleQuotedToken)
					{
					// Get rest of double-quoted token.
					//
					OLDReadDoubleQuotedToken();
					break;
					}

				else if (ch == '\'')
					{
					if (token.size() > 0)
						{
						// We've encountered a single quote after a token has
						// already begun to be read; should be another tandem
						// single quote character immediately following.
						//
						ch = ReadNextChar();
						if (ch == '\'')
							AppendToToken(ch);
						else
							{
							errormsg = "Expecting second single quote character";
							throw NxsException( errormsg, GetFilePosition(), GetFileLine(), GetFileColumn());
							}
						}
					else
						{
						// Get rest of quoted NEXUS word and break, since
						// we will have eaten one token after calling GetQuoted.
						//
						OLDReadQuoted();
						}
					break;
					}

				else if (OLDIsPunctuation(ch))
					{
					if (token.size() > 0)
						{
						// If we've already begun reading the token, encountering
						// a punctuation character means we should stop, saving
						// the punctuation character for the next token.
						//
						saved = ch;
						break;
						}
					else
						{
						// If we haven't already begun reading the token, encountering
						// a punctuation character means we should stop and return
						// the punctuation character as this token (i.e., the token
						// is just the single punctuation character.
						//
						AppendToToken(ch);
						break;
						}
					}

				else
					{
					AppendToToken(ch);
					}

				}

			if (labileFlags & kNewlineIsToken)
			 	AlterTokenReading(kNewlineIsNotToken);
			if (labileFlags & kHyphenNotPunctuation)
				AlterTokenReading(kHyphenIsPunctuation);
			labileFlags = 0;
			return GetTokenReference();
			}
	/*----------------------------------------------------------------------------------------------------------------------
	|	Reads rest of comment (starting '[' already input) and acts accordingly. If comment is an output comment, and if 
	|	an output stream has been attached, writes the output comment to the output stream. Otherwise, output comments are 
	|	simply ignored like regular comments. If the labileFlag bit saveCommandComments is in effect, the comment (without 
	|	the square brackets) will be stored in token. 
	*/
	void NxsToken::OLDReadComment()
		{
		// Set comment level to 1 initially.  Every ']' encountered reduces
		// level by one, so that we know we can stop when level becomes 0.
		//
		int level = 1;

		// Get first character
		//
		char ch = ReadNextChar();
		if (AtEOF())
			{
			errormsg = "Unexpected end of file inside comment";
			throw NxsException( errormsg, GetFilePosition(), GetFileLine(), GetFileColumn());
			}

		// See if first character is the output comment symbol ('!')
		// or command comment symbol (&)
		//
		int printing = 0;
		int command = 0;
		if (ch == '!')
			printing = 1;
		else if (ch == '&' && labileFlags & saveCommandComments)
			{
			command = 1;
			AppendToToken(ch);
			}
		else if (ch == ']')
			return;

		// Now read the rest of the comment
		//
		for(;;)
			{
			ch = ReadNextChar();
			if (AtEOF())
				break;

			if (ch == ']')
				--level;
			else if (ch == '[')
				++level;

			if (level == 0)
				break;

			if (printing)
				OLDAppendToComment(ch);
			else if (command)
				AppendToToken(ch);
			}

		if (printing)
			{
			// Allow output comment to be printed or displayed in most appropriate
			// manner for target operating system
			//
			OutputComment(comment);
			}
		
		// Now that we are done with it, free the memory used to store the comment
		//
		comment.clear();
		}


	/*----------------------------------------------------------------------------------------------------------------------
	|	Reads rest of a token surrounded with curly brackets (the starting '{' has already been input) up to and including
	|	the matching '}' character. All nested curly-bracketed phrases will be included.
	*/
	void NxsToken::OLDReadCurlyBracketedToken()
		{
		// Set level to 1 initially.  Every '}' encountered reduces
		// level by one, so that we know we can stop when level becomes 0.
		//
		int level = 1;

		char ch;
		for(;;)
			{
			ch = ReadNextChar();
			if (AtEOF())
				break;

			if (ch == '}')
				--level;
			else if (ch == '{')
				++level;

			AppendToToken(ch);

			if (level == 0)
				break;
			}
		}

	/*----------------------------------------------------------------------------------------------------------------------
	|	Gets remainder of a double-quoted NEXUS word (the first double quote character was read in already by GetNextToken).
	|	This function reads characters until the next double quote is encountered. Tandem double quotes within a 
	|	double-quoted NEXUS word are not allowed and will be treated as the end of the first word and the beginning of the 
	|	next double-quoted NEXUS word. Tandem single quotes inside a double-quoted NEXUS word are saved as two separate 
	|	single quote characters; to embed a single quote inside a double-quoted NEXUS word, simply use the single quote by 
	|	itself (not paired with another tandem single quote).
	*/
	void NxsToken::OLDReadDoubleQuotedToken()
		{
		char ch;

		for(;;)
			{
			ch = ReadNextChar();
			if (AtEOF())
				break;

			if (ch == '\"')
				break;
			else
				AppendToToken(ch);
			}
		}

	/*----------------------------------------------------------------------------------------------------------------------
	|	Gets remainder of a quoted NEXUS word (the first single quote character was read in already by GetNextToken). This
	|	function reads characters until the next single quote is encountered. An exception occurs if two single quotes occur
	|	one after the other, in which case the function continues to gather characters until an isolated single quote is
	|	found. The tandem quotes are stored as a single quote character in the token string.
	*/
	void NxsToken::OLDReadQuoted()
		{
		char ch;

		for(;;)
			{
			ch = ReadNextChar();
			if (AtEOF())
				break;

			if (ch == '\'' && saved == '\'')
				{
				// Paired single quotes, save as one single quote
				//
				AppendToToken(ch);
				saved = '\0';
				}
			else if (ch == '\'' && saved == '\0')
				{
				// Save the single quote to see if it is followed by another
				//
				saved = '\'';
				}
			else if (saved == '\'')
				{
				// Previously read character was single quote but this is something else, save current character so that it will
				// be the first character in the next token read
				//
				saved = ch;
				break;
				}
			else
				AppendToToken(ch);
			}
		}

	/*----------------------------------------------------------------------------------------------------------------------
	|	Reads rest of parenthetical token (starting '(' already input) up to and including the matching ')' character.  All
	|	nested parenthetical phrases will be included.
	*/
	void NxsToken::OLDReadParentheticalToken()
		{
		// Set level to 1 initially.  Every ')' encountered reduces
		// level by one, so that we know we can stop when level becomes 0.
		//
		int level = 1;

		char ch;
		for(;;)
			{
			ch = ReadNextChar();
			if (AtEOF())
				break;

			if (ch == ')')
				--level;
			else if (ch == '(')
				++level;

			AppendToToken(ch);

			if (level == 0)
				break;
			}
		}

#	endif	//defined (SUPPORT_NXS_TOKEN_LABILE_FLAGS)