// This file is part of BULL, a program for phylogenetic simulations // most of the code was written by Mark T. Holder. // This program is for internal use by the lab of Dr. Tandy Warnow only. // Do not redistribute the code. It is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // Some of the code is from publically available source by Paul Lewis, Ziheng Yang, // John Huelsenbeck, David Swofford , and others (as noted in the code). // In fact the main structure of the program was created by modifying Paul Lewis' // basiccmdline.cpp from his NCL // // This code was used in Mark's dissertation, some changes were made in order to // get it to compile on gcc. It is possible that this porting introduced bugs (very // little debugging has been done on UNIX platforms). I would suggest checking // the simulator by generating data on trees with short branches, etc. #include #include #include "bull_kernel.hpp" #include "bull_parsers.hpp" #include "string_extensions.hpp" #include "tree.hpp" #include "xbull.hpp" using namespace std; using namespace bull; extern long gseed; void CenterOverflowMultipliers(double *mults,double *previousLikelihoods,int naa,int ntax); double DistanceBasesMoved(double *curr,double *prev); double DistanceRateParamsMoved(double *curr,double *prev); double EuclideanDistance(int arrSize,double *f,double *s); void NormalizeGTRParams(double *curr,double *currNorm); BullKernel::BullKernel() :charsOpen(false), modelsOpen(false), taxaOpen(false), treesOpen(false) { Reset(); notifyListeners(BullListener::kInitialized); } BullKernel::~BullKernel() { notifyListeners(BullListener::kDying); this->deleteModels(false); this->deleteTrees(false); simSettings.Reset(); notifyListeners(BullListener::kDead); } void BullKernel::Reset() { logEachStep = true;//TEMPORARY criterion = Criterion(MAX_LIKE); this->deleteModels(false); this->deleteTrees(false); workingOnBranches = false; nParamDirectionsToSkip = 0; nParamGroupsToSkip = 0; nBranchesToSkip = 0; lastParamImprov = kMaxDouble; lastBranchImprov = kMaxDouble; analysisMode = AnalysisMode(INFERENCE); numSimulationConditionsDone = 0; } void BullKernel::deleteTrees(bool leaveOpen) { if (treeList.empty()) return; openTrees(); try { std::list::iterator tIt = this->treeList.begin(); for (; tIt != this->treeList.end(); ++tIt) delete *tIt; this->treeList.clear(); } catch (...) { if (!leaveOpen) closeTrees(); } } void BullKernel::deleteModels(bool leaveOpen) { openModels(); try { vector::iterator mIt = this->modVec.begin(); for (; mIt != this->modVec.end(); ++mIt) delete *mIt; this->modVec.clear(); simSettings = SimSettings(); } catch (...) { if (!leaveOpen) closeModels(); } } //////////////////////////////////////////////////////////////////////////////// /// Assumes control of the trees in `treesToBeAdded` according to `updateMode` /// \returns a vector of trees that the caller is responsible for deleting //////////////////////////////////////////////////////////////////////////////// std::vector BullKernel::updateTrees( const UpdateMode updateMode, std::vector treesToBeAdded) { if (updateMode == kIgnoreUpdateMode) return treesToBeAdded; BullAttributeManager bam(BullAttribute(kTrees), *this); if (updateMode == kClearUpdateMode || updateMode == kReplaceUpdateMode) { this->deleteTrees(true); } else if (updateMode != kAppendUpdateMode) { string msg = "Tree storage mode "; AppendInt(msg, (int) updateMode); msg.append(" is not currently supported."); throw XBull(msg); } if (updateMode == kClearUpdateMode) return treesToBeAdded; // Transfer ownership of the trees to treeList std::vector unused; for (std::vector::iterator i = treesToBeAdded.begin(); i != treesToBeAdded.end(); ++i) treeList.push_back(*i); notifyListeners(BullListener::kTaxaAdded); return unused; } void BullKernel::notifyListeners(BullListener::Event e, void *blob) { void * t = (void *) this; for (std::set::iterator i = listeners.begin(); i != listeners.end(); ++i) (*i)->stateChanged(e, t, blob); } #if 0 void BullKernel::ExecSimulate(const SimulateOpts & simOptions ) { analysisMode=amode(SIMULATION); criterion=criteria(maxLike); token.GetNextToken(); long nSimChars=0,nReps; unsigned concatenations=0; int nOutputs=1,noutputTypes=0,noutputFilenames=0,npbfilenames=0; vector pbfilename; vector filename; vector outTypes; Tree *temptre; vector toscoreT; vector toscoreN; bool automatic=false; std::string tagname; SSettings->setAppendToOut(true);//overwrite option isn't persistent bool collapsingImpossibleBr=false; ofstream collf; std::string collfname; while (token.GetToken()!=";") { bool needAnotherToken=true; if (token.Abbreviation("NReps")) nReps=GetLongInteger(token); else if (token.Abbreviation("COLLapsefile")) { collapsingImpossibleBr=true; collfname=GetFileName(token); } else if (token.Abbreviation("NChars")) nSimChars=GetLongInteger(token); else if (token.Abbreviation("Concat")) concatenations=GetLongInteger(token); else if (token.Abbreviation("NOUTput")) { nOutputs=GetLongInteger(token); if (nOutputs < 1) { errormsg="nOutputs must be >0"; throw XBull( errormsg, token); } if (nOutputs > 1 && (noutputTypes > 0 || noutputFilenames > 0 || npbfilenames > 0)) { errormsg="if nOutputs > 1, it must be specified before filenames, paupblockfilenames and outputtypes"; throw XBull( errormsg, token); } } else if (token.Abbreviation("Paupblockfile")) { token.GetNextToken(); if (token.Equals("=")) token.GetNextToken(); if (token.Equals("(")) token.GetNextToken(); for (int i=0;i < nOutputs;i++) { npbfilenames++; pbfilename.push_back(token.GetToken()); token.GetNextToken(); } if (token.Equals(")")) token.GetNextToken(); needAnotherToken=false; } else if (token.Abbreviation("OVERWrite"))//not persistent SSettings->setAppendToOut(false); else if (token.Abbreviation("File")) { token.GetNextToken(); if (token.Equals("=")) token.GetNextToken(); if (token.Equals("(")) token.GetNextToken(); for (int i=0;i < nOutputs;i++) { noutputFilenames++; filename.push_back(token.GetToken()); token.GetNextToken(); } if (token.Equals(")")) token.GetNextToken(); needAnotherToken=false; } else if (token.Abbreviation("TAg")) tagname=GetFileName(token); else if (token.Abbreviation("AUTOmatic")) automatic=true; else if (token.Abbreviation("OUTputtypes")) { token.GetNextToken(); if (token.Equals("=")) token.GetNextToken(); if (token.Equals("(")) token.GetNextToken(); for (int i=0;i < nOutputs;i++) { noutputTypes++; if (token.Abbreviation("PROtein") || token.Abbreviation("AMino")) outTypes.push_back(EncodingType(AminoAcid)); else if (token.Abbreviation("Dna")) outTypes.push_back(EncodingType(DNANoGap)); else { errormsg="The output type "; errormsg+=token.GetToken(); errormsg+=" is invalid"; throw XBull( errormsg, token); } token.GetNextToken(); } if (token.Equals(")")) token.GetNextToken(); needAnotherToken=false; } else { if(token.IsInteger()){ long tn=token.GetLongEquivalent(); cout< ntrees || tn < 1) { errormsg="Tree "; errormsg+=token.GetToken(); errormsg+=" unknown. simulation aborted."; throw XBull( errormsg, token); } temptre=treelist[tn-1]; } else { try { temptre=FindTreeFromName(token.GetToken()); } catch (NoSuchTree) { errormsg="Tree "; errormsg+=token.GetToken(); errormsg+=" unknown. simulation aborted."; throw XBull( errormsg, token); } } toscoreN.push_back(token.GetToken());//keeps track of what the user called the tree toscoreT.push_back(temptre); } if (needAnotherToken) token.GetNextToken(); } if ((!concatenations && !nSimChars) || (concatenations && nSimChars)) { errormsg="Either the number of characters or the number of concatenations must be specified"; throw XBull( errormsg, token); } if (outTypes.size() == 0){ //dna is the default output type for (int i=0;i < nOutputs;i++) outTypes.push_back(EncodingType(DNANoGap)); } SSettings->setNumOutputs(nOutputs); for (int i=0;i < nOutputs;i++) { SSettings->SetOutputDatatype(i,outTypes[i]); pbfilename.push_back("CurrentlyUnusedVariable"); } //TEMPORARY lots of the interaction with SSettings assumes that you are using SSRFCodonSubModel if (!modArr) { errormsg="You can't simulate a codon model without first declaring the models using codLikeStartVal"; throw XBull( errormsg, token); } if (concatenations == 0) { if(nSimChars%(3*(*modArr)->nCodonsInProtein)) { errormsg="The number of characters must be a multiple of the number of amino acids"; throw XBull( errormsg, token); } concatenations = (3*(*modArr)->nCodonsInProtein)/nSimChars; } SSRFCodonSubMod **tempMod; tempMod=modArr; ModifyBlenMultSoBranchesAreScaled(modArr,(*modArr)->nCodonsInProtein); for (int k = 0; k < (*modArr)->nCodonsInProtein; k++) { (*tempMod)->ResizeModel(); SSettings->AddPartSetting(new PartSettings(k,k+1),(Model **) tempMod++,1); } SSettings->setNumConcats(concatenations); SSettings->setNumReps(nReps); const unsigned cnr = (concatenations * nReps); const unsigned nsao = (cnr < 100 ? cnr : 100); SSettings->setNumSimsAtOnce(nsao); assert(!collapsingImpossibleBr || (SSettings->getNumSimsAtOnce() == 1 && collfname.length() > 0)); if (toscoreT.empty()) { for (std::size_t i = 0; i < treelist.size(); i++) { toscoreT.push_back(treelist[i]); std::string s; AppendInt(s, (int)i); toscoreN.push_back(s); } } if (collapsingImpossibleBr) { collf.open(collfname.c_str()); assert(collf.good()); toscoreT[0]->PrintTaxaBlock(collf); collf<<"endblock;\n\nbegin trees;\n"; } time_t bef,aft; int num; long seedatbeg; for (std::size_t i = 0; i < toscoreT.size(); i++) { cout<hasBranchLengths)) { //weak test (doesn't look at all partitions) errormsg="The tree must have branch lengths in order to simulate"; throw XBull( errormsg, token); } GetTreeReadyToSimulate(toscoreT[i]); int ndone=0; while (nReps > ndone) { const unsigned ntodothisRound = std::min((unsigned)(nReps-ndone), SSettings->getNumSimsAtOnce()); seedatbeg=gseed; toscoreT[i]->SimulateData(ntodothisRound);//done in batches to speed things up if (automatic) { for (int nofs=0;nofs < nOutputs;nofs++) { assert(tagname.length() > 0); assert(nReps == 1); std::string thisRepsOutTreeFile,thisRepsOutFile; if (outTypes[nofs] == EncodingType(DNANoGap)) thisRepsOutFile="dna"; else if (outTypes[nofs] == EncodingType(AminoAcid)) thisRepsOutFile="prot"; else assert(0); if (i != 0) SSettings->setAppendToOut(true); thisRepsOutFile+=tagname; thisRepsOutTreeFile=thisRepsOutFile; thisRepsOutTreeFile+="Tree"; thisRepsOutTreeFile+=(i+1); thisRepsOutTreeFile+=".tre"; if (!ndone) WriteSimulations(ntodothisRound,toscoreT[i],thisRepsOutFile,thisRepsOutTreeFile,outTypes[nofs],ndone,seedatbeg,numSimulationConditionsDone,true); else WriteSimulations(ntodothisRound,toscoreT[i],thisRepsOutFile,thisRepsOutTreeFile,outTypes[nofs],ndone,seedatbeg,numSimulationConditionsDone,true); } } else { for (int nofs=0;nofs < nOutputs;nofs++) WriteSimulations(ntodothisRound,toscoreT[i],filename[nofs],pbfilename[nofs],outTypes[nofs],ndone,seedatbeg,numSimulationConditionsDone); } if (collapsingImpossibleBr) { std::string oriname; oriname=toscoreT[i]->GetName(); std::string temp="CollapsedModel"; temp+=i+1; toscoreT[i]->SetName(temp); toscoreT[i]->PrintBranchesWithMuts(collf,true); toscoreT[i]->SetName(oriname); } ndone+=ntodothisRound; } ofstream outpf; for (int nofs=0;nofs < nOutputs;nofs++) { outpf.open(filename[nofs].c_str(),ios::app); outpf<<"BEGIN PAUP;\n\texecute postDataPaup.nex;\nEND;\n"; outpf.close(); } DestroySimulationAttributes(toscoreT[i]); } if (collapsingImpossibleBr) collf<<"end;"<Reset(); } void Bull::ExecCodLikeStartValCommand(const CodLikeStartOpts & clso) { int currbrlen=-1,nst=-1,naa=-1; double *sharedModParam,kapp=-1.0; workingOnBranches=false; sharedModParam=new double [10]; for (int i=0;i < 10;i++) sharedModParam[i]=-1.0; double **aafreq,*mults; double treesc=1.0; int code; //#ifdef ALLOWMULTIHITS double mh=0.0; //#endif aafreq=NULL; mults=NULL; HandleCodLikeStartVal(token,currbrlen,nst,naa,sharedModParam,kapp,aafreq,mults,treesc,code,mh); delete []sharedModParam; delete []mults; free_RectMats(aafreq); } void Bull::HandleCodLikeStartVal(NexusToken& token,int& currbrlen,int& nst,int& naa,double *&sharedModParam, double& kapp,double **&aafreq,double *&mults,double& treesc,int &code,double &mh) { delete[] previousLikelihoods; previousLikelihoods=NULL; nParamDirectionsToSkip=0; nParamGroupsToSkip=0; nBranchesToSkip=0; paramImprovThisRound=0.0; ParseCodLikeStartValCommand(token,currbrlen,nst,naa,sharedModParam,kapp,aafreq,mults,treesc,code,mh); if (previousLikelihoods) CenterOverflowMultipliers(mults,previousLikelihoods,naa,1610); //TEMPORARY should be modified to allow parameter/options to be persistent if (nst == 1) {sharedModParam[3]=sharedModParam[4]=sharedModParam[5]=sharedModParam[6]=sharedModParam[7]=sharedModParam[8]=1.0;} else if (nst == 2) {if(kapp<-1) {if(sharedModParam[3]<0.0) { delete []mults; delete [] previousLikelihoods; previousLikelihoods=NULL; free_RectMats(aafreq); throw XBull("Kappa must be specified option to CodLikeStartVal command"); } else if (fabs(sharedModParam[3]-sharedModParam[5]) > 0.00001 || fabs(sharedModParam[3]-sharedModParam[6]) > 0.00001|| fabs(sharedModParam[3]-sharedModParam[8]) > 0.00001 || fabs(sharedModParam[4]-sharedModParam[7]) > 0.00001) { delete []mults; delete [] previousLikelihoods; previousLikelihoods=NULL; free_RectMats(aafreq); throw XBull("The specified rmatrix doenst conform to K2P in CodLikeStartVal command"); } sharedModParam[4]/=sharedModParam[3]; sharedModParam[7]/=sharedModParam[3]; sharedModParam[3]=sharedModParam[4]=sharedModParam[5]=sharedModParam[8]=1.0; } else {sharedModParam[3]=sharedModParam[5]=sharedModParam[6]=sharedModParam[8]=1.0; sharedModParam[4]=sharedModParam[7]=kapp; } } else if (nst == 6) { if(sharedModParam[3]<0.0) { delete []mults; delete [] previousLikelihoods; previousLikelihoods=NULL; free_RectMats(aafreq); throw XBull("RMatrix must be specified option to CodLikeStartVal command"); } } else if (kapp < 0.0) { if(sharedModParam[3]<0.0) { sharedModParam[3]=sharedModParam[4]=sharedModParam[5]=sharedModParam[6]=sharedModParam[7]=sharedModParam[8]=1.0; } } else { sharedModParam[3]=sharedModParam[5]=sharedModParam[6]=sharedModParam[8]=1.0; sharedModParam[4]=sharedModParam[7]=kapp; } if (sharedModParam[1]<0.0) { sharedModParam[0]=sharedModParam[1]=sharedModParam[2]=0.25; } if (naa < 1) { delete []mults; delete [] previousLikelihoods; previousLikelihoods=NULL; free_RectMats(aafreq); throw XBull("You must specify the number of amino acid freqs in CodLikeStartVal command"); } #ifdef ALLOWMULTIHITS sharedModParam[9]=mh; #endif Model **arrOfMods; arrOfMods=new Model*[naa]; modArr=new SSRFCodonSubMod*[naa]; sizeOfModArr=naa; CreateSetOfSSRFCodonSubMods(naa,sharedModParam,aafreq,modArr,mults,treesc,code); for (int k=0;k < naa;k++) arrOfMods[k]=(Model *)modArr[k]; delete []arrOfMods; } void CenterOverflowMultipliers(double *mults,double *previousLikelihoods,int naa,int ntax) { for (int i=0;i < naa;i++) {double y=exp((*previousLikelihoods++)/((double) ntax)); *mults++=y; cout<