// This file is part of BULL, a program for phylogenetic simulations // most of the code was written by Mark T. Holder. // This program is for internal use by the lab of Dr. Tandy Warnow only. // Do not redistribute the code. It is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // Some of the code is from publically available source by Paul Lewis, Ziheng Yang, // John Huelsenbeck, David Swofford , and others (as noted in the code). // In fact the main structure of the program was created by modifying Paul Lewis' // basiccmdline.cpp from his NCL // // This code was used in Mark's dissertation, some changes were made in order to // get it to compile on gcc. It is possible that this porting introduced bugs (very // little debugging has been done on UNIX platforms). I would suggest checking // the simulator by generating data on trees with short branches, etc. #include "bull.hpp" #include "bull_kernel.hpp" #include "string_extensions.hpp" #include "xbull.hpp" #include "set_reader.hpp" const double bull::kDefStartEdgeLen = 0.1; const double bull::kSmallDbl = 1.0e-10; const double bull::kMaxDouble = 1.0e303; using namespace std; using namespace bull; Bull::Bull() :trees_block(NULL), taxa_block(NULL), kernel(NULL) { id = "BULL"; FactoryDefaults(); } Bull::~Bull() { if (kernel) delete kernel; deleteNexusBlocks(); } void Bull::FactoryDefaults() { logEachStep = true;//TEMPORARY inf_open = false; logf_open = false; quit_now = false; storeBrLensFromFile = true; if (logf.good()) logf.close(); next_command[0] = '\0'; criterion = Criterion(MAX_LIKE); this->ResetBlocks(); this->deleteModels(); this->deleteTrees(); workingOnBranches = false; nParamDirectionsToSkip = 0; nParamGroupsToSkip = 0; nBranchesToSkip = 0; lastParamImprov = kMaxDouble; lastBranchImprov = kMaxDouble; simSetting = SimSettings(); analysisMode = inference; numSimulationConditionsDone = 0; } /** * @method ResetBlocks [void:protected] * * Detaches all blocks, deletes them, creates new blocks, and * finally adds the new blocks. */ void Bull::deleteNexusBlocks() { if (taxa_block != NULL) { Detach(taxa_block); delete taxa_block; taxa_block = NULL; } if (trees_block != NULL) { Detach(trees_block); delete trees_block; trees_block = NULL; } } /** * @method ResetBlocks [void:protected] * * Detaches all blocks, deletes them, creates new blocks, and * finally adds the new blocks. */ void Bull::ResetBlocks() { DeleteNexusBlocks(); taxa_block = new TaxaBlock(); Add(taxa_block); trees_block = new TreesBlock(*taxa_block); Add(trees_block); } /** * @method FinishTaxaBlock [void :protected] * * Interacts with the NCL classes to get all of the number of taxa * (should probably get taxa names here too) */ void Bull::FinishTaxaBlock() { ntaxa=taxa->GetNumTaxonLabels(); if (ntaxa > 1) cout<GetNumTrees(); for (int i=0;i < ntrees;i++){ Tree *temptree=new Tree(trees->GetTranslatedTreeDescription(i)); if (!temptree->IsGood()) throw MTHException("Bad Tree"); std::string s=trees->GetTreeName(i); temptree->SetName(s); this->tree_list.push_back(temptree); } if (ntrees > 1) cout<GetRecursiveNodeList(); while (*temp) {if((*temp)->IsTerm()) (*temp)->ReplaceSetOfLikeInfoButKeepBranchLen(CreateSetOfSimAttrForANode()); else (*temp)->ReplaceSetOfLikeInfoButKeepBranchLen(CreateSetOfSimAttrForANode()); temp++; } t->SetSimAttributes(CreateSetOfSimAttrForATree()); t->GetSimAttributes()->InitializeModelMixingParams(); # ifdef ELIMINATE_ALL_ZEROS //CALLING Bull::EliminateAminoAcidsThatHaveFreqZero() won't work SSRFCodonSubMod *tempM; for (unsigned p=0; p < SSettings->GetNDataPartitions(); ++p) { for (unsigned m=0; m < SSettings->GetNModelsInPart(p); ++m) { tempM=(SSRFCodonSubMod *) t->GetSimAttributes()->GetModel(p,m); tempM->ResizeModel(); t->GetSimAttributes()->GetSimAtt(p,m)->nStates=tempM->GetNStates(); } } # endif } /** * @method CreateSetOfSimAttrForANode [SetOfLikeAttr *:protected] * * DANGEROUS way to give an node its SimAttr and a setofLikeAttr for * the simulations. bool * NOTE that SimAttr are derived from LikeAttr, and this returns a * MEMORY ALLOCATION for like attr and setOfLikeAttr. Deletion occurs in * Node::ReplaceSetOfLikeInfoButKeepBranchLen( SetOfLikeAttr *) which is called by Bull::GetTreeReadyToSimulate() * with CreateSetOfSimAttrForANode as an argument */ SetOfLikeAttr *Bull::CreateSetOfSimAttrForANode() { //For simulations all nodes get a Terminal Like attr (because the characters will be stored as shorts //if memory is an issue the internal nodes could share memory assert(SSettings); SetOfLikeAttr *temp; temp=new SetOfLikeAttr(SSettings->GetNDataPartitions()); Model *tempMod; for (int i=0;i < SSettings->GetNDataPartitions();i++) {int nmods=SSettings->GetNModelsInPart(i); temp->SetNumModsInPart(i,nmods); for (int j=0;j < nmods;j++) {tempMod=SSettings->GetModel(i,j); unsigned len = SSettings->getNumConcats() * SSettings->getNumSimsAtOnce() *SSettings->GetPartitionLength(i); SimNodeLikeAttr * snla = new SimNodeLikeAttr(len, tempMod); temp->AddLikeAtt(i, j, snla); } } return temp; } /** * @method CreateSetOfSimAttrForANode [SetOfTreeSimAttr *:protected] * * klutzy way to give an tree its SimAttr and a setofLikeAttr for * the simulations. * MEMORY ALLOCATION for like attr and setOfLikeAttr. Deletion occurs in * Tree::SetSimAttributes(SetOfTreeSimAttr *) which is called by Bull::GetTreeReadyToSimulate() * with CreateSetOfSimAttrForATree as an argument */ SetOfTreeSimAttr *Bull::CreateSetOfSimAttrForATree(void) { SetOfTreeSimAttr *temp; temp=new SetOfTreeSimAttr(SSettings->GetNDataPartitions()); for (int i=0;i < SSettings->GetNDataPartitions();i++) {int nmods=SSettings->GetNModelsInPart(i); temp->SetNumModsInPart(i,nmods); if (nmods > 1) temp->SetModelMixingParam(i,SSettings->GetModelMixingParam(i)); const unsigned singleLen = SSettings->getNumConcats() * SSettings->GetPartitionLength(i); for (int j=0;j < nmods;j++) { TreeSimAttr *tempPtr; const unsigned len = singleLen * SSettings->getNumSimsAtOnce(); tempPtr=new TreeSimAttr(len, SSettings->GetModel(i,j)); temp->AddAtt(i,j,tempPtr); #ifdef ONESETOFBRANCHES if (i == 0 && j == 0) tempPtr->SetOwnsBrLen(true); else tempPtr->SetOwnsBrLen(false); #else MTHException("Unwritten code in CreateSetOfSimAttrForATree()"); #endif } temp->SetNCharInEachDataSet(i, singleLen); } temp->maxReps=SSettings->getNumSimsAtOnce(); return temp; } void Bull::WriteSimulations( int num, Tree *t, std::string outpfn, std::string ,//paupblockfn, int outputEncodingType, int totRepNum, long seedAtBeg, int simCondNum, bool /* autoPaup=false*/) { assert(SSettings); ofstream outpf; bool startingNewFile=false; if (!totRepNum && !(SSettings->getAppendToOut())) {outpf.open(outpfn.c_str()); startingNewFile=true; } else outpf.open(outpfn.c_str(),ios::app); SetOfTreeSimAttr *tssa=t->GetSimAttributes(); int totntax=t->GetNTax(); int nparts=tssa->GetNParts(); int totNCharPerDataSet=0; int *ncharsInPart; ncharsInPart=new int[nparts]; //assumes that all characters will be stored in the simattributes as character state ordination codings in a short bool allsamemodelencoding=true; int theModelEncodingType; theModelEncodingType=tssa->GetModel(0,0)->GetEncodingType(); for (int i=1;i < nparts;i++) if (tssa->GetModel(i,0)->GetEncodingType()!=theModelEncodingType) {allsamemodelencoding=false; delete []ncharsInPart; throw MTHException("Right now only one type of model encoding at a time can be used to simulate data"); } double modelToOutputCharRatio=NumDNACharactersPerCharacter(theModelEncodingType)*NumColumnsPerCharacter(outputEncodingType); modelToOutputCharRatio/=NumDNACharactersPerCharacter(outputEncodingType); for (int i=0;i < nparts;i++) { ncharsInPart[i]=tssa->GetNCharInEachDataSet(i); //ncharsInPart[i]*=NumDNACharactersPerCharacter(theModelEncodingType); //ncharsInPart[i]/=NumDNACharactersPerCharacter(outputEncodingType); //ncharsInPart[i]*=NumColumnsPerCharacter(outputEncodingType); totNCharPerDataSet+=(int)(((double) ncharsInPart[i])*modelToOutputCharRatio); } int maxlen=t->GetMaxTaxonLabelLength(true)+4; char **OrdToNexTrans; OrdToNexTrans=GetOrdinationToNexusTranslator(theModelEncodingType,outputEncodingType); if (startingNewFile) { outpf<<"#NEXUS\n"; } if (!totRepNum) {outpf<<"[!###SIMCONDITIONS="<Print(outpf,true,false,false); outpf<<"]\n"; outpf<<"BEGIN PAUP;\n\texecute preDataPaup.nex;\nEND;\n"; } for (int ds=0;ds < num;ds++) {if(!ds) outpf<<"[!Seed At Beginning Of This batch of simulations =" <GetNTax();tax++) {Node *tempno=t->GetTerminalNode(tax); std::string tn=tempno->GetName(); if (ds) outpf<SetSimAtt(parti,0);//TEMPORARY only works for one model per partition simulations short *cis=tempno->simInfo->CharsInShorts+ds*citp; int *modelTrans=((SSRFCodonSubMod *) tempno->simInfo->GetModel())->GetCurrentLocToGlob(); for (int ch=0;ch < citp;ch++) outpf< "; cin.getline( next_command, 256 ); if cin.eof() return; PreprocessNextCommand(); HandleNextCommand(); } } catch(bad_alloc ba) { cout<<"There is not enough memory to execute that command, Bull will be crashing now."<< std::endl; raise; } } /** * @method Read [void:protected] * @param token [NexusToken&] the token used to read from in * @throws XBull * * This function provides the ability to read everything following * the block name (which is read by the Nexus object) to the end or * endblock statement. Characters are read from the input stream * in. Overrides the pure virtual function in the base class. */ void Bull::Read( NexusToken& token ) { isEmpty = false; // this should be the semicolon after the block name // token.GetNextToken(); if ( !token.Equals(";") ) { errormsg = "Expecting ';' after "; errormsg += id; errormsg += " block name, but found "; errormsg += token.GetToken(); errormsg += " instead"; throw XBull(errormsg, token); } //Get Info from other blocks if not already done if (purged && !taxa->IsEmpty()) {purged=false; FinishTaxaBlock(); if (!trees->IsEmpty()) FinishTreesBlock(); } for (;;) {token.GetNextToken(); if ( token.Abbreviation("ENdblock") ) {HandleEndblock( token ); break; } else if ( token.Abbreviation("Quit") ) {quit_now = true; message = "\nBull says goodbye\n"; PrintMessage(); break; } TryAdvancedCommands(token); } } void Bull::TryAdvancedCommands(NexusToken &token) { //everything but Endblock and quit if (token.Abbreviation("CODLIKESTartval") || token.Abbreviation("CLIKESTartval") || token.Abbreviation("CLSTartval")) HandleCodLikeStartVal(token ); else if ( token.Abbreviation("EXecute") ) HandleExecute( token ); else if ( token.Abbreviation("EXECUTEBull") || token.Abbreviation("EXBull") ) HandleExecuteBullBlocksInFile(token); else if ( token.Abbreviation("Gettrees")) HandleGetTrees( token ); else if ( token.Abbreviation("LOg") ) HandleLog( token ); else if ( token.Abbreviation("SImulate") ) HandleSimulate( token ); else {SkippingCommand( token.GetToken() ); do { token.GetNextToken(); } while ( !token.AtEOF() && !token.Equals(";") ); if ( token.AtEOF() ) { errormsg = "Unexpected end of file encountered"; throw XBull(errormsg, token); } } } /** * @method PreprocessNextCommand [void:public] * * Begins with the command just entered by the user, which is stored in * the data member next_command, adds a semicolon (if the user failed * to supply one), and then adds "end;" so the whole bundle looks * like a very short Bull block. This is then passed to HandleNextCommand, * which processes it just like a real Bull block in a NEXUS data file. */ void Bull::PreprocessNextCommand() { // If user failed to add the terminating semicolon, // we'll do it now. We will also remove the line feed // at the end and add the command "end;" to the end // of the line (see explanation below). // int len = strlen(next_command); assert( len > 0 ); // Remove any whitespace characters from end of string entered by user // int i = len; while ( i > 0 && next_command[i-1] == ' ' || next_command[i-1] == '\t' || next_command[i-1] == '\n' ) i--; // If character at position i-1 is a semicolon, put '\0' terminator at position i; // otherwise, put a semicolon at position i and terminator at i+1 // if ( next_command[i-1] != ';' ) { next_command[i] = ';'; i++; } assert( i <= COMMAND_MAXLEN ); next_command[i] = '\0'; // Now add a semicolon at the beginning and terminate with an "END;" command // so that we can pretend this is simply a very short private NEXUS block // containing only one command. This allows us to simply use the Read // function we inherited from the base class BstBase to process the command. // len = strlen(next_command); assert( len < COMMAND_MAXLEN-2 ); std::string tmp = ";"; tmp += next_command; tmp += "end;"; strcpy( next_command, tmp.c_str() ); } /** * @method DestroySimulationAttributes [void :protected] * @param t [Tree *] the tree owning the sim attributes * * This frees the memory taken by a tree's sim attributes * after the simulation is done. It is called by HandleSimulate */ void Bull::DestroySimulationAttributes(Tree *t) { RecursiveNodeList=t->GetRecursiveNodeList();//probably unnecessary (already done) Node **temp; SetOfTreeSimAttr *treesSetofSim=t->GetSimAttributes(); PartModIndex potOwner; for (int p=0;p < treesSetofSim->GetNParts();p++) for (int m=0;m < treesSetofSim->GetNModels(p);m++) {potOwner.Set(p,m); if (!(treesSetofSim->GetOwnsBrLengths(potOwner))) {temp=RecursiveNodeList; while (*temp) {(*temp)->SetSimAtt(potOwner.partNum,potOwner.modNum); (*temp)->simInfo->SetBLenPtr(NULL); temp++; } } } temp=RecursiveNodeList=t->GetRecursiveNodeList(); while (*temp) {if((*temp)->IsTerm()) (*temp)->ReplaceSetOfLikeInfoButKeepBranchLen(NULL);//Destroys old SimAttr for the nodes (including brLens) else (*temp)->ReplaceSetOfLikeInfoButKeepBranchLen(NULL);//Destroys old SimAttr for the nodes (including brLens) temp++; } t->SetSimAttributes(NULL);//Destroys old SimAttr for the tree (including brLen Modifiers) } /** * @method NexusError [virtual void:public] * @param msg [std::string&] the error message * @param pos [streampos] the point in the NEXUS file where the error occurred * @param line [long] the line in the NEXUS file where the error occurred * @param col [long] the column in the NEXUS file where the error occurred * * Called when an error is encountered in a NEXUS file. Allows program to * give user details of the error as well as the precise location of the * error. Virtual function that overrides the pure virtual function in the * base class Nexus. */ void Bull::NexusError( const string& msg, std::istream::pos_type /* pos */, long line, long col) { message = "\n"; message.append(msg); PrintMessage(); if ( inf_open ) { message = "Line: "; message += line; PrintMessage(); message = "Column: "; message += col; PrintMessage(); } } /** * @method Report [virtual void:public] * @param out [ostream&] the output stream to which to write the report * * This function outputs a brief report of the contents of this Bull block. * Overrides the pure virtual function in the base class. */ void Bull::Report( ostream& /* out */ ) { message = ""; PrintMessage(); message = id; message += " block contains..."; PrintMessage(); } /** * @method Reset [void:protected] * * Overrides the pure virtual function in the base class. */ void Bull::Reset() { isEmpty = true; inf_open = false; quit_now = false; message = ""; next_command[0] = '\0'; } /** * @method FindTreeFromName [Tree *:protected] * @param s [std::string] the name of the requested tree * * searches through the vector of Tree * for the requested tree * not case sensitive because Tree names are capitalized * NOTE it doesn't check to see if there is more than one Tree in the * list with the correct name, it just returns the first. */ Tree* Bull::FindTreeFromName(std::string s) { ToUpper(s); for (std::size_t i=0; i < treelist.size(); ++i) { if (s == treelist[i]->GetName()) return treelist[i]; } throw NoSuchTree(s.c_str()); } /** * @method SkippingBlock [virtual void:public] * @param blockName [std::string] the unrecognized block name * * Called when program does not recognize a block name encountered in a * NEXUS file. Virtual function that overrides the pure virtual function * in the base class Nexus. */ void Bull::SkippingBlock(const std::string & blockName) const { message = "Skipping unknown block ("; message += blockName; message += ")"; PrintMessage(); } /** * @method SkippingCommand [virtual void:public] * @param commandName [std::string] the name of the command being skipped * * This function is called when an unknown command named commandName is * about to be skipped. This version of the function (which is identical * to the base class version) does nothing (i.e., no warning is issued * that a command was unrecognized). Modify this virtual function to * provide such warnings to the user (or eliminate it altogether since * the base class version already does what this does). */ void Bull::SkippingCommand( std::string commandName ) { message = "Skipping unknown command ("; message += commandName; message += ")"; PrintMessage(); } /** * @method EnteringBlock [virtual void:public] * @param blockName [std::string] the name of the block just entered * * Called by the Nexus object when a block named blockName is entered. * Allows program to notify user of progress in parsing the NEXUS file. * Virtual function that overrides the pure virtual function in the * base class Nexus. */ void Bull::EnteringBlock( std::string blockName ) { message = "Reading "; message += blockName; message += " block..."; PrintMessage(); } /** * @method FileExists [bool:protected] * @param fn [const char*] the name of the file to check * * Returns true if file named fn already exists, * false otherwise. */ bool Bull::FileExists( const char* fn ) { ifstream testst; testst.open(fn); if (testst.good()) {testst.close(); return true; } testst.close(); return false; } /** * @method GetFileName [std::string:protected] * @param token [NexusToken&] the token used to read from in * @throws XBull * * Called whenever a file name needs to be read from either * the command line or a file. Expects next token to be "=" * followed by the token representing the file name. * Call. (I've relaxed the requirement for =, MTH) * this function after, say, the keyword "file" has been * read in the following LOG command: *
 * log file=doofus.txt start replace;
 * 
* Note that this function will read only "=doofus.txt " * leaving "start replace;" in the stream for reading * at a later time. */ std::string Bull::GetFileName( NexusToken& token ) { // Eat the equals sign if present // token.GetNextToken(); // Now get the filename itself // if ( token.Equals("=") ) token.GetNextToken(); return token.GetToken(); } long Bull::GetLongInteger( NexusToken& token ) { // Eat the equals sign if present // token.GetNextToken(); // Now get the filename itself // if ( token.Equals("=") ) token.GetNextToken(); if (token.IsInteger()) return token.GetLongEquivalent(); else {errormsg="Expecting integer but found "; errormsg+=token.GetToken(); throw XBull(errormsg, token); } return 0; } double Bull::GetDouble( NexusToken& token ) { // Eat the equals sign if present // token.GetNextToken(); // Now get the filename itself // if ( token.Equals("=") ) token.GetNextToken(); if (IsDouble(token.GetToken())) return atof(token.GetToken().c_str()); else {errormsg="Expecting a number but found but found "; errormsg+=token.GetToken(); throw XBull(errormsg, token); } return 0; } void Bull::OutputComment( std::string s ) { cout << endl; cout << s << endl; logf << endl; logf << s << endl; } void Bull::ExitingBlock( std::string blockName ) { cout << "Finished with \"" << blockName << "\" block." << endl; logf << "Finished with \"" << blockName << "\" block." << endl; }