// Hennig86Reader.hpp #if !defined(HENNIG86_READER_HPP) #define HENNIG86_READER_HPP #include #include #include #include #include #include #include #include #include "NexusWriter.hpp" namespace Cipres { namespace SequenceFormats { class Hennig86Reader { public: Hennig86Reader() throw() { } void Read(std::istream &input_stream, NexusWriter &writer) throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); private: class Lexer { public: enum TokenType { STRING, INTEGER, COMMA, PERIOD, AMPERSAND, SEMICOLON, UNDERSCORE, PLUS_SIGN, MINUS_SIGN, EQUALS_SIGN, SINGLE_QUOTE, FORWARD_SLASH, QUESTION_MARK, OPEN_BRACKET, CLOSE_BRACKET, OPEN_BRACE, CLOSE_BRACE, OPEN_PARENS, CLOSE_PARENS, OTHER_NON_ALNUM, END_OF_FILE }; struct Token { bool prev_space; size_t line_num; size_t col_num; unsigned int type; std::string value; }; Lexer(std::istream &input_stream) throw(std::ios_base::failure) : m_line_num(1), m_col_num(0), m_input_stream(input_stream) { input_stream.exceptions(std::ios_base::badbit); ReadNextChar(); } unsigned int GetToken(Token &token) throw(std::bad_alloc, std::ios_base::failure); bool GetBlock(std::string &block, char delim) throw(std::bad_alloc, std::ios_base::failure); bool SkipBlock(char delim) throw(std::ios_base::failure); private: void ReadNextChar() throw(std::ios_base::failure); char m_input; size_t m_line_num; size_t m_col_num; std::istream &m_input_stream; }; class Parser { public: Parser(std::istream &input_stream, NexusWriter &writer) throw() : m_default_data_type(NUMERIC), m_max_char_states(10), m_num_chars(0), m_num_taxa(0), m_lexer(input_stream), m_writer(writer) { } void Parse() throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); private: enum CharacterDataType { NUMERIC, DNA, PROTEIN, CONTINUOUS }; class SyntaxError : public std::runtime_error { public: SyntaxError(Lexer::Token &token) throw(); ~SyntaxError() throw() { } // what const char *what() const throw() { if(!m_error_message.empty()) return m_error_message.c_str(); else return runtime_error::what(); } private: std::string m_error_message; }; unsigned int GetToken() throw(std::bad_alloc, std::ios_base::failure) { m_token.value.clear(); m_lexer.GetToken(m_token); return m_token.type; } int ToInteger(const std::string &value) throw(std::runtime_error) { int result; std::istringstream converter(value); if(!(converter >> result)) throw std::runtime_error("not an integer"); return result; } float ToFloat(const std::string &value) throw(std::runtime_error) { float result; std::istringstream converter(value); if(!(converter >> result)) throw std::runtime_error("not a floating point number"); return result; } int SymbolToState(char symbol) throw() { static const char states[256] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; return states[static_cast(symbol)]; } char StateToSymbol(unsigned char state) throw() { static const char symbols[32] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U' ,'V' }; assert(state < sizeof(symbols) / sizeof(char)); return symbols[state]; } bool MatchesKeyword(const char *keyword, std::string &value) throw(); void ParsedIgnoredCommmand() throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); void ParseCnamesCommand() throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); void ParseCommentsCommand() throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); void ParseNstatesCommand() throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); void ParseXreadCommand() throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); void ParseInterleavedData() throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); void ParseDnaDataBlock(bool interleaved) throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); void ParseProteinDataBlock(bool interleaved) throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); void ParseNumDataBlock(bool interleaved) throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); void ParseAlphaNumDataBlock(bool interleaved) throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); void ParseContinuousDataBlock(bool interleaved) throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); void ParseTaxonName(std::string &name) throw(std::bad_alloc, std::runtime_error, std::ios_base::failure); unsigned int m_default_data_type; int m_max_char_states; int m_num_chars; int m_num_taxa; Lexer m_lexer; Lexer::Token m_token; NexusWriter &m_writer; }; }; } // namespace SequenceFormats } // namespace Cipres #endif // HENNIG86_READER_HPP