#ifndef NCL_NXSTOKEN_H #define NCL_NXSTOKEN_H #include "phycas/phycas.h" #include #include "ncl/nxs_defs.hpp" #include "ncl/misc/string_extensions.hpp" // April 2003 changes: // Correctly parses embedded comments: test[embedded]comment as one token "testcomment" // Parsing 2X-3X faster (on metrowerks compiles, at least) // breaks token at ' : test'string' is now two tokens "test" and "string" previously had been an error // comments that are contiguous (or embedded) within a token are now stored until the token is advanced. This // will be useful if one supports "marking up" tokens with formatting comments // To speed up ReadToken several interface changes had to be made: // SetLabileFlagBit() has been replaced with AlterTokenReading which is much more restrictive (only affects handling of newlines and hyphens) // Note that unlike SetLabileFlagBit, the fields changed by a call to AlterTokenReading remain changed until a subsequent call to function // with the opposite argument turns them off. // Instead of setting the labile flag bit saveCommandComments now you must call ReadCommandCommentOrToken() // Instead of setting the labile flag bit parentheticalToken, curlyBracketedToken, doubleQuotedToken now you must call ReadAllTokensUntil(char tokenBreaker) // Instead of setting the labile flag bit singleCharacterToken now you must call ReadTokenAsSingleCharacter() // tildeIsPunctuation, useSpecialPunctuation, preserveUnderscores, ignorePunctuation flags have been deprecated // NxsToken is now const-correct // If SUPPORT_NXS_TOKEN_LABILE_FLAGS is defined in nxsdefs.h, then NxsToken offers legacy support // to the old (slower, but more flexible) tokenizing that relies on GetNextToken and Labile flags // As with all deprecated functions, new code shouldn't use these functions as they may not be supported much longer the functions are: // GetNextToken() // SetSpecialPunctuationCharacter(char c); // SetLabileFlagBit(int bit); // // Deprecate functions: // Begins (this would be easy to bring back) IsAbbreviation is still available // BlanksToUnderscores // GetTokenAsCStr // IsPlusMinusToken // IsWhitespaceToken // StoppedOn(char ch) (instead of StoppedOn(x) you can call use x == PeekAtNextChar() ) // StripWhitespace // ToUpper() // Write(ostream &out); // Writeln(ostream &out); // Added GetNumComments, GetNextCommentIndex // Also added support for backing up the stream to allow for "reparsing" via Get/SeekTokenizerState // // /*-------------------------------------------------------------------------------------------------------------------------- | Exception thrown by NxsToken::GetComment */ class NxsX_InvalidCommentIndex{}; /*-------------------------------------------------------------------------------------------------------------------------- | Exception thrown by NxsToken::SeekTokenizerState */ class NxsX_InvalidTokenizerState{}; /*-------------------------------------------------------------------------------------------------------------------------- | This class stores the information about where the token is in its stream. | Note that the entire state of the token is not cached in this class, only the file postition fields. | To cache up in the token stream (so you can later back up) you need a NxsTokenizerState object. | */ class NxsTokenPosInfo { public : inline unsigned GetFileColumn() const; inline unsigned GetFileLine() const; inline file_pos GetFilePosition() const; private : inline NxsTokenPosInfo(file_pos p, unsigned l, unsigned c, char nextChar, bool a, bool eol); file_pos filepos; /* file position. the number of characters from the beginning of the stream */ unsigned fileline; /* the file line (number of newline characters since the beginning of the stream) */ unsigned filecol; /* the file column (number of characters since the last newline) */ bool atEOF; /* true if at the end of the file */ bool atEOL; /* true if at the end of a line */ char nextCharInStream; friend class NxsTokenizerState; friend class NxsToken; friend class NxsCommand; }; class NxsToken; /*-------------------------------------------------------------------------------------------------------------------------- | used to store NxsToken state (current token and it's place in the stream) */ class NxsTokenizerState { public : inline NxsTokenPosInfo GetPosInfo() const; private : NxsTokenizerState(const NxsToken & t); NxsTokenizerState(); NxsTokenPosInfo posInfo; std::string token; # if defined(NXS_INTERNAL_TOKEN_BUFFER) file_pos posBeforeLastRead; /* file position. the number of characters from the beginning of the stream */ unsigned posInLocalBuffer; unsigned nCharsInLocalBuffer; bool lastReadHitEnd; # endif friend class NxsToken; friend class NxsCommand; }; /*-------------------------------------------------------------------------------------------------------------------------- | Stores the text of a comment, where in the token it was found, and any whether it is an output/command comment */ class NxsComment { public : inline const std::string &GetCommentText() const; inline unsigned GetLocationInToken() const; inline bool IsCommandComment() const; inline bool MatchesModifier(char modChar) const; private : inline NxsComment(const std::string &s, unsigned pos, char comChar, const std::string &progSpecifier); std::string body; /* everything that was between the [] except for command codes */ unsigned placeInToken; /* location of the comment in the current token (number of characters from the beginning of the token) */ char modifierChar; /* the command comment character e.g. !, \ or & */ std::string intendedProgramCode; /* for && comments intended for only one program not implemented */ friend class NxsToken; }; /*-------------------------------------------------------------------------------------------------------------------------- | Class used for all input in NCL. | Takes a stream and breaks it into legal nexus tokens through calls to ReadToken() (the prefix ++ operator can also | be used to advance the token reader). | Access to the current token is through GetToken() or GetTokenReference() (the latter avoids copying the std::string that | holds the token and is preferable if a simple read-only check of the token is needed. WARNING: because it returns the | token reference any function call that changes the NxsToken state could update alter this reference). | SimpleNexus comments that are encountered are simply stored (and will be flushed when the token position is advanced | again) along with information about where in the current token they were encountered. For access to comments see | GetNumComments(), GetNextCommentIndex() and GetComment(). | Overriding OutputComment(const std::string &) const allows you to display (or ignore) any [! ]- style output comments | | In accordance with the Nexus standard: | _ encountered (in non-quoted strings) are converted to ' ' | comments are stored but do not affect tokenizing | Tokens are broken by whitespace or punctuation | Whitespace is any char < 7, ' ', tab or any newline. | Punctuation consist of any of the following ;()]{}/\,:=*\"`+-<>' | All punctuation characters except single-quotes are returned as individual tokens. | When a token begins with a single quote everything between the closing quote is treated as the token (the token will | not contain the surrounding quotes (Note adjacent '' within a single-quoted string are treated as one single-quote). | Thus 'test tok' will be read as one 8 character string and would be identical to test_tok. | All three OS newline designations are returned as the character '\n' | | Additional features: | To ease reading matrices of single characters use ReadSingleCharacter() instead of ReadToken(). | To return command comments as tokens use ReadCommandCommentOrToken() instead of ReadToken(). | To read and concatenate all tokens until some punctuation mark use ReadAllTokensUntil(char). For example to read a double- | quoted string, after seeing the opening " one could call ReadAllTokensUntil('\"') | AlterTokenReading() can be used to disobey Nexus Tokenizing Rules: Reading interleaved matrices requires recognizing | newline characters as tokens. Reading numbers is easier if hyphens are not treated as punctuation. These can | both be dealt with by calls to AlterTokenReading(). | End of files are only acceptable in certain places (in between blocks). Calling SetEOFAllowed(false) will result | in an NxsException exception being thrown if eof is found. This reduces the need to manually verify that you aren't at eof. | Fast reading of streams is implemented through unbuffered io. | Implementation of backing up in the stream would be inefficient (and rarely needed). In those instances in which | one might need to back up to a previous point, use GetTokenizerState() and then SeekTokenizerState(). | | For improved code readability/maintainability: | IsAbbreviation() and Equals() are provided to allow testing of the current token (these functions help you avoid code | like: IsCapAbbreviation(nxsToken.GetTokenReference(), "Test") | ThrowIfNot(std::string,bool) makes sure that the token equals the argument. If not an NxsException with an appropriate message | is thrown. | | Implementation notes: at first glance the file position info seems wrong. this is because the true file position is always one | character ahead and the nextCharInStream is stored. This make tokenizing much easier. | Several functions that are inlined appear too long to inline. Given how crucial speed is (all input occurs through NxsToken) | the code bloat seems worth ignoring the rules of thumb here (these functions are only calle by a couple of other functions */ class NxsToken NON_COPYABLE { public: STATELESS_FUNC VecString TokenizeString(const std::string &); NxsToken(std::istream & i); NxsToken(const std::string & s); # if defined (SUPPORT_NXS_TOKEN_LABILE_FLAGS) enum NxsTokenFlags /* For use with the variable labileFlags */ { saveCommandComments = 0x0001, /* if set, command comments of the form [&X] are not ignored but are instead saved as regular tokens (without the square brackets, however) */ parentheticalToken = 0x0002, /* if set, and if next character encountered is a left parenthesis, token will include everything up to the matching right parenthesis */ curlyBracketedToken = 0x0004, /* if set, and if next character encountered is a left curly bracket, token will include everything up to the matching right curly bracket */ doubleQuotedToken = 0x0008, /* if set, grabs entire phrase surrounded by double quotes */ singleCharacterToken = 0x0010, /* if set, next non-whitespace character returned as token */ newlineIsToken = 0x0020, /* if set, newline character treated as a token and atEOL set if newline encountered */ tildeIsPunctuation = 0x0040, /* if set, tilde character treated as punctuation and returned as a separate token */ useSpecialPunctuation = 0x0080, /* if set, character specified by the data member special is treated as punctuation and returned as a separate token */ hyphenNotPunctuation = 0x0100, /* if set, the hyphen character is not treated as punctutation (it is normally returned as a separate token) */ preserveUnderscores = 0x0200, /* if set, underscore characters inside tokens are not converted to blank spaces (normally, all underscores are automatically converted to blanks) */ // The following two flags are NOT LEGAL arguments to SetLabileFlags // newlineIsNotToken = 0x0400, hyphenIsPunctuation = 0x0800 }; const std::string & GetNextToken(); void SetSpecialPunctuationCharacter(char c); void SetLabileFlagBit(int bit); std::string GetErrorMessage() const {return errormsg;} private: inline void OLDAppendToComment(char); void OLDReadComment(); void OLDReadParentheticalToken(); void OLDReadCurlyBracketedToken(); void OLDReadDoubleQuotedToken(); void OLDReadQuoted(); bool OLDIsPunctuation(char ch) const; std::string comment; char special; /* ad hoc punctuation character; default value is '\0' */ int labileFlags; /* storage for flags in the NxsTokenFlags enum */ char saved; std::string errormsg; # else //defined (SUPPORT_NXS_TOKEN_LABILE_FLAGS) enum NxsTokenFlags /* arguments to AlterTokenReading used to change the standard tokenizing rules */ { kNewlineIsToken = 0x0001, /* causes newline characters to be treated like punctuation (useful in reading data matrices) */ kNewlineIsNotToken = 0x0002, /* causes newline to be treated as whitespace (this is the default state). Used to undo kNewlineIsToken */ kHyphenNotPunctuation = 0x0004, /* causes hyphen to not be punctuation character (useful in reading in numbers) */ kHyphenIsPunctuation = 0x0008 /* causes hyphen to be punctuation character (this is the default state). Used to undo kHyphenNotPunctuation*/ }; const std::string & GetNextToken() { return ReadToken(); } # endif //defined (SUPPORT_NXS_TOKEN_LABILE_FLAGS) virtual ~NxsToken(); // Primary Services // const std::string & ReadToken(); void ReadSingleCharacter(); bool ReadCommandCommentOrToken(); void ReadAllTokensUntil(char tokenBreaker); // used to read the rest of double-quoted, parenthetical, or curly-bracketed token groups bool ReadDoubleToken(double *dbl); NxsToken &operator++(); //equivalent to ReadToken(); // Accessors // bool AtEOF() const; bool AtEOL() const; bool Equals(const std::string &s, bool must_respect_case = false) const; NxsComment GetComment(unsigned commentIndex) const; unsigned GetFileColumn() const; file_pos GetFilePosition() const; unsigned GetFileLine() const; unsigned GetNumComments() const; unsigned GetNextCommentIndex(char commandSpec = '\0',unsigned searchFrom = 0) const; std::string GetToken(bool must_respect_case = true) const; unsigned GetTokenLength() const; NxsTokenizerState GetTokenizerState() const; const std::string & GetTokenReference() const; bool IsAbbreviation(const std::string &s) const; bool IsPunctuationToken() const; bool IsWhitespace(char ch) const; char PeekAtNextChar() const; // Modifiers // void AlterTokenReading(NxsTokenFlags bit); void ReplaceToken(const std::string &s); void ResetToken(); void SeekTokenizerState(const NxsTokenizerState &tpi); bool SetEOFAllowed(bool b); //returns previous setting! void ThrowIfNot(const std::string &s, bool respect_case = false) const; void ThrowUnexpectedTokenNxsException(const std::string &) const; protected : // // void OutputComment(const std::string &msg) const; private : void AdvanceToNextCharInStream(); void AppendRestOfToken(char ch); void AppendToToken(char ch); bool CharIsATokenByItself(char ch) const; # if defined(NXS_INTERNAL_TOKEN_BUFFER) void FillInternalBuffer(); # endif void Initialize(); bool IsTokenBreaker(char) const; unsigned ReadComment(); char ReadFirstGraphChar(); char ReadFirstGraphCharOrCommandComment(); char ReadNextChar(); void ReadQuoted(); std::string ReadCommandCommentProgramIdentifier(); class InternalStringAndStream { public: std::string nString; # if defined(NCL_USING_SSTREAM) std::stringstream strStream; InternalStringAndStream(const std::string &strCopy) :nString(strCopy), strStream(nString.c_str()) {} # else std::istrstream strStream; InternalStringAndStream(const std::string &strCopy) :nString(strCopy), strStream(nString.c_str()) {} # endif }; class InternalPosInfo { public: # if defined(NXS_INTERNAL_TOKEN_BUFFER) InternalPosInfo(); mutable file_pos posBeforeLastRead; /* file position. the number of characters from the beginning of the stream */ unsigned posInLocalBuffer; unsigned nCharsInLocalBuffer; bool lastReadHitEnd; # else mutable file_pos filePos; /* file position. the number of characters from the beginning of the stream */ # endif unsigned fileLine; /* the file line (number of newline characters since the beginning of the stream) */ unsigned fileColumn; /* the file column (number of characters since the last newline) */ bool atEOF; /* true if at the end of the file */ bool atEOL; /* true if at the end of a line */ }; InternalStringAndStream *intStrStream; # if defined(NXS_INTERNAL_TOKEN_BUFFER) char * localBuffer; /* the input stream that is being tokenized */ # endif std::istream &inputStream; /* the input stream that is being tokenized */ char nextCharInStream; /* the character that broke the current token */ std::string token; /* the current token */ InternalPosInfo posInfo; /* current position in stream info. Note: posInfo refers the current character in the stream not the nextCharInStream */ std::vector comments; /* full information on all of the comments associated with the current token */ bool eofAllowed; /* checked when eof is encountered. if false, an NxsX_UnexpectedEOF exception is thrown. Can be changed with SetEOFAllowed(bool) */ char currentSingleTokenStr[20]; /* list of characters that are single token characters (current punctuation) altered as necessary by AlterTokenReading() */ char currentTokenBreakerStr[23]; /* list of characters that are token breakers (current punctuation and whitespace) altered as necessary by AlterTokenReading() */ char newLineCheck; /* used to avoid extra slim down the IsWhitespace function. If kNewlineIsToken, this char is set to ' ', so IsWhitespace('\n') won't return true */ bool tokenStateHyphenIsPunc; /* Do NOT change this - use it to read the state ONLY (Use AlterTokenReading to change behaviour of the tokenizer) reports part of tokenizing state. */ friend class NxsTokenizerState; }; #if defined (NCL_SUPPORT_OLD_NAMES) typedef NxsToken NexusToken; #endif /*-------------------------------------------------------------------------------------------------------------------------- | Returns true if ch is any nexus punctuation mark except [ ' | [ and ' never get returned as punctuation, | if kHyphenNotPunctuation the Caller must check to make sure the char isn't a - | */ inline bool IsAlwaysSingleTokenPunctChar(char ch) { return (std::strchr("(){}\"-]/\\,;:=*`+<>", ch) != NULL); } /*-------------------------------------------------------------------------------------------------------------------------- | calls ReadToken(). Only prefix ++ is defined. */ inline NxsToken &NxsToken::operator++() { ReadToken(); return *this; } /*-------------------------------------------------------------------------------------------------------------------------- | Returns the number of comments read between the end of the last token and the end of the current token. | Comments don't break tokens so a single token could contain several comments. See GetNextCommentIndex() */ inline unsigned NxsToken::GetNumComments() const { return (unsigned)comments.size(); } /*-------------------------------------------------------------------------------------------------------------------------- | Returns true if the current token has all of the letters in the capitalized prefix of s and doesn't disagree with other | characters in s. | Written to avoid proliferation of ugly code like : IsCapAbbreviation(nxsToken.GetTokenReference(), s) */ inline bool NxsToken::IsAbbreviation(const std::string &s ) const { return IsCapAbbreviation(token, s); } /*-------------------------------------------------------------------------------------------------------------------------- | Returns true if the input stream is at its end. */ inline bool NxsToken::AtEOF() const { return posInfo.atEOF; } /*-------------------------------------------------------------------------------------------------------------------------- | Returns true if the current character (the last character in the current token) is an end of line */ inline bool NxsToken::AtEOL() const { return posInfo.atEOL; } /*-------------------------------------------------------------------------------------------------------------------------- | returns true if the current token equals s. | Written to avoid proliferation of ugly code like : nxsToken.GetTokenReference().Equal(s, ncl::kStringNoRespectCase) */ inline bool NxsToken::Equals( const std::string &s, bool must_respect_case ) const /* true to make the comparison case sensitive */ { return StrEquals(token, s, (must_respect_case ? ncl::kStringRespectCase : ncl::kStringNoRespectCase) ); } /*-------------------------------------------------------------------------------------------------------------------------- | Returns the column number (number of characters since last newline) */ inline unsigned NxsToken::GetFileColumn() const { return posInfo.fileColumn; } /*-------------------------------------------------------------------------------------------------------------------------- | returns number of characters since the beginning of the file */ inline file_pos NxsToken::GetFilePosition() const { # if defined(NXS_INTERNAL_TOKEN_BUFFER) return file_pos(posInfo.posBeforeLastRead + file_pos(posInfo.posInLocalBuffer)); # else posInfo.filePos = inputStream.rdbuf()->pubseekoff(0,std::ios::cur, std::ios::in); return posInfo.filePos; # endif } /*-------------------------------------------------------------------------------------------------------------------------- | return the line number (number of newlines since the beginning of the file */ inline unsigned NxsToken::GetFileLine() const { return posInfo.fileLine; } /*-------------------------------------------------------------------------------------------------------------------------- | return the state of the NxsToken object. Used if the stream might need to be backed up to this point. In which case | SeekTokenizerState is called with the stored NxsTokenizerState object as an argument. */ inline NxsTokenizerState NxsToken::GetTokenizerState() const { return NxsTokenizerState(*this); } /*-------------------------------------------------------------------------------------------------------------------------- | Returns the next character in the stream. This is very fast because that character is always cached in the process of | reading tokens so this doesn't involve a call to peek(). */ inline char NxsToken::PeekAtNextChar() const { return nextCharInStream; } /*-------------------------------------------------------------------------------------------------------------------------- | Returns true if the current token is length one and is a punctuation char (){}\"-]/\\,;:=*`+<> */ inline bool NxsToken::IsPunctuationToken() const { return (GetTokenLength() == 1 && IsAlwaysSingleTokenPunctChar( token.at(0) ) ); } /*-------------------------------------------------------------------------------------------------------------------------- | Clears the current token and all stored comments */ inline void NxsToken::ResetToken() { token.clear(); comments.clear(); } /*-------------------------------------------------------------------------------------------------------------------------- | Used to control whether or not an exception is thrown when eof is encountered. To throw exceptions (e.g. when entering | a block) call SetEOFAllowed(false). then when returning to a portion of the file between blocks call SetEOFAllowed(true) | eof are allowed by default. */ inline bool NxsToken::SetEOFAllowed( bool b) { bool prev = eofAllowed; eofAllowed = b; return prev; } /*-------------------------------------------------------------------------------------------------------------------------- | returns true if ch is a command comment symbol or output comment symbol currently only recognizes ! \ and & */ inline bool IsCommentModifier(char ch) //@need to add the other command comment ids { return (std::strchr("!\\&", ch) != NULL); } /*-------------------------------------------------------------------------------------------------------------------------- | returns true if ch is a single character token (punctuation other than single quote) (){}\"]/\\,;:=*`+<> always return | true. - might return true (it does by default, but AlterTokenReading allows this to be changed). | newline might return true (if AlterTokenReading(kNewlineIsToken) has been called) */ inline bool NxsToken::CharIsATokenByItself(char ch) const { return (std::strchr(currentSingleTokenStr, ch) != NULL); } /*-------------------------------------------------------------------------------------------------------------------------- | Returns true if ch is any nexus punctuation mark or whitespace. | Whether or not hyphens are punctuation can be changed by AlterTokenReading() */ inline bool NxsToken::IsTokenBreaker(char ch) const { return (std::strchr(currentTokenBreakerStr, ch) != NULL); } /*-------------------------------------------------------------------------------------------------------------------------- | Reads the next character and then stops. To be used when character by character input is easier to handle. | NOTE dangerous skips whitespace and comments, but this does not follow all nexus token rules. | A single quote can be returned as a token. single quotes will generate errors in the calling function! */ inline void NxsToken::ReadSingleCharacter() { token = ReadFirstGraphChar(); } /*-------------------------------------------------------------------------------------------------------------------------- | Returns true if the character is a space, tab, newline or < 7. | Whether or not newlines are whitespace can be changed by AlterTokenReading() */ inline bool NxsToken::IsWhitespace(char ch) const { return (ch == ' ' || (ch == newLineCheck) || ch == '\t' || ch < 7); } /*-------------------------------------------------------------------------------------------------------------------------- | */ inline NxsTokenPosInfo::NxsTokenPosInfo( file_pos p, /* current file position */ unsigned l, /* current line number */ unsigned c, /* current column number */ char nextC, bool ateof, /* true if at the EOF */ bool ateol) /* true if at the end of a line */ : filepos(p), fileline(l), filecol(c), atEOF(ateof), atEOL(ateol), nextCharInStream(nextC) { }; /*-------------------------------------------------------------------------------------------------------------------------- | returns the position in file info (NxsTokenPosInfo object) */ inline NxsTokenPosInfo NxsTokenizerState::GetPosInfo() const { return posInfo; } /*-------------------------------------------------------------------------------------------------------------------------- | Returns the column number (number of characters since last newline) */ inline unsigned NxsTokenPosInfo::GetFileColumn() const { return filecol; } /*-------------------------------------------------------------------------------------------------------------------------- | returns number of characters since the beginning of the file */ inline file_pos NxsTokenPosInfo::GetFilePosition() const { return filepos; } /*-------------------------------------------------------------------------------------------------------------------------- | return the line number (number of newlines since the beginning of the file */ inline unsigned NxsTokenPosInfo::GetFileLine() const { return fileline; } /*-------------------------------------------------------------------------------------------------------------------------- | returns the comment's location (number of characters from the beginning of the current token) */ inline unsigned NxsComment::GetLocationInToken() const { return placeInToken; } /*-------------------------------------------------------------------------------------------------------------------------- | returns the comment, whatever was between [] (excluding the first character if it was a comment modifier). */ inline const std::string &NxsComment::GetCommentText() const { return body; } /*-------------------------------------------------------------------------------------------------------------------------- | returns the comment, whatever was between [] (excluding the first character if it was a comment modifier). */ inline NxsComment::NxsComment( const std::string &s, unsigned pos, char comChar, const std::string &progSpecifier) : body(s), /* the text of the comment */ placeInToken(pos), /* the number of characters in the current token before the beginning of the comment was reached */ modifierChar(comChar), /* command/output comment specifier that applies to this commend, send '\0' if not applicable */ intendedProgramCode(progSpecifier) /* program code (for use with command comments that are directed to only one program (used with [&&ProgramName ...] comments */ { } /*-------------------------------------------------------------------------------------------------------------------------- | returns true if there is a modifier character and it isn't ! (the printing modifier). */ inline bool NxsComment::IsCommandComment() const { return ((modifierChar != '\\') && (modifierChar != '&')); } /*-------------------------------------------------------------------------------------------------------------------------- | returns true if comments modifier character (e.g. ! for output comment or & for some command comments) matches the | argument. */ inline bool NxsComment::MatchesModifier( char modChar) const { return (modifierChar == modChar); } /*-------------------------------------------------------------------------------------------------------------------------- | Function to improve code readability. | Throws an exception if the current token is not equal to the first argument. */ inline void NxsToken::ThrowIfNot( const std::string &s, /* string that should be identical to the current token. */ bool respect_case ) const /*true if the compare should be case sensitive */ { if (!Equals(s, respect_case)) ThrowUnexpectedTokenNxsException(s); } #if defined (SUPPORT_NXS_TOKEN_LABILE_FLAGS) /*---------------------------------------------------------------------------------------------------------------------- | Sets the bit specified in the variable `labileFlags'. The available bits are specified in the NxsTokenFlags enum. | All bits in `labileFlags' are cleared after each token is read. */ inline void NxsToken::SetLabileFlagBit( int bit) /* the bit (see NxsTokenFlags enum) to set in `labileFlags' */ { labileFlags |= bit; if (bit == newlineIsToken || bit == hyphenNotPunctuation) AlterTokenReading((NxsTokenFlags) bit); } /*---------------------------------------------------------------------------------------------------------------------- | Returns true if character supplied is considered a punctuation character. The following twenty characters are | considered punctuation characters: |> | ()[]{}/\,;:=*'"`+-<> |> | Exceptions: |~ | o The tilde character ('~') is also considered punctuation if the tildeIsPunctuation labile flag is set | o The special punctuation character (specified using the SetSpecialPunctuationCharacter) is also considered | punctuation if the useSpecialPunctuation labile flag is set | o The hyphen (i.e., minus sign) character ('-') is not considered punctuation if the hyphenNotPunctuation | labile flag is set |~ | Use the SetLabileFlagBit method to set one or more NxsLabileFlags flags in `labileFlags' */ inline bool NxsToken::OLDIsPunctuation( char ch) const /* the character in question */ { // PAUP 4.0b10 // o allows ]`<> inside taxon names // o allows `<> inside taxset names // bool is_punctuation = false; if (IsTokenBreaker(ch)) return true; if (labileFlags & tildeIsPunctuation && ch == '~') is_punctuation = true; if (labileFlags & useSpecialPunctuation && ch == special) is_punctuation = true; if (labileFlags & hyphenNotPunctuation && ch == '-') is_punctuation = false; return is_punctuation; } /*---------------------------------------------------------------------------------------------------------------------- | Adds `ch' to end of comment std::string. */ inline void NxsToken::OLDAppendToComment( char ch) /* character to be appended to comment */ { comment += ch; @@@-POL-@@@ convert to << operator } # endif // defined (SUPPORT_NXS_TOKEN_LABILE_FLAGS) /*-------------------------------------------------------------------------------------------------------------------------- | Appends the character ch to the current token. */ inline void NxsToken::AppendToToken( char ch ) { token << ch; } /*-------------------------------------------------------------------------------------------------------------------------- | Returns a copy ofthe token as a std::string. */ inline std::string NxsToken::GetToken( bool must_respect_case) const { return (!must_respect_case ? GetCapitalized(token) : token); } /*-------------------------------------------------------------------------------------------------------------------------- | Returns the size of the current token. */ inline unsigned NxsToken::GetTokenLength() const { return (unsigned) token.size(); } /*---------------------------------------------------------------------------------------------------------------------- | Returns the token for functions that only need read only access - faster than GetToken. */ inline const std::string &NxsToken::GetTokenReference() const { return token; } /*---------------------------------------------------------------------------------------------------------------------- | Replaces current token std::string with s. */ inline void NxsToken::ReplaceToken( const std::string &s) /* std::string to replace current token std::string */ { token = s; } #if OLD_NXS_TOKEN || defined (SUPPORT_NXS_TOKEN_LABILE_FLAGS) /*---------------------------------------------------------------------------------------------------------------------- | Sets the special punctuation character to `c'. If the labile bit useSpecialPunctuation is set, this character will | be added to the standard list of punctuation symbols, and will be returned as a separate token like the other | punctuation characters. */ inline void NxsToken::SetSpecialPunctuationCharacter( char c) /* the character to which `special' is set */ { special = c; } #endif #endif