// This file is part of BULL, a program for phylogenetic simulations // most of the code was written by Mark T. Holder. // This program is for internal use by the lab of Dr. Tandy Warnow only. // Do not redistribute the code. It is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // Some of the code is from publically available source by Paul Lewis, Ziheng Yang, // John Huelsenbeck, David Swofford , and others (as noted in the code). // In fact the main structure of the program was created by modifying Paul Lewis' // basiccmdline.cpp from his NCL // // This code was used in Mark's dissertation, some changes were made in order to // get it to compile on gcc. It is possible that this porting introduced bugs (very // little debugging has been done on UNIX platforms). I would suggest checking // the simulator by generating data on trees with short branches, etc. #include #include #include "ssrf_codon_sub_mod.hpp" #include "basic_bull.hpp" #include "lin_alg.hpp" using std::cout; using namespace bull; double SSRFCodonSubMod::defFreqA(.25); double SSRFCodonSubMod::defFreqC(.25); double SSRFCodonSubMod::defFreqG(.25); double SSRFCodonSubMod::defrAC(1); double SSRFCodonSubMod::defrAG(2); double SSRFCodonSubMod::defrAT(1); double SSRFCodonSubMod::defrCG(1); double SSRFCodonSubMod::defrCT(2); double SSRFCodonSubMod::defrGT(1); double SSRFCodonSubMod::defA(.05); double SSRFCodonSubMod::defC(.05); double SSRFCodonSubMod::defD(.05); double SSRFCodonSubMod::defE(.05); double SSRFCodonSubMod::defF(.05); double SSRFCodonSubMod::defG(.05); double SSRFCodonSubMod::defH(.05); double SSRFCodonSubMod::defI(.05); double SSRFCodonSubMod::defK(.05); double SSRFCodonSubMod::defL(.05); double SSRFCodonSubMod::defM(.05); double SSRFCodonSubMod::defN(.05); double SSRFCodonSubMod::defP(.05); double SSRFCodonSubMod::defQ(.05); double SSRFCodonSubMod::defR(.05); double SSRFCodonSubMod::defS(.05); double SSRFCodonSubMod::defT(.05); double SSRFCodonSubMod::defV(.05); double SSRFCodonSubMod::defW(.05); double SSRFCodonSubMod::defY(.05); double SSRFCodonSubMod::defStop(.00); double SSRFCodonSubMod::defRate(1); double *nucRateMat[4][4]; double *baseFreqMat[4]; bool SSRFCodonSubMod::mutParamsDirty(true); int SSRFCodonSubMod::geneticCode((int) GenCode(Mito)); int CodByaa[21][6]; int NCodByAA[21]; int CodNum[64]; int tfirbase[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3}; int tsecbase[]={0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3}; int tthibase[]={0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}; SSRFCodonSubMod::SSRFCodonSubMod(int ncods,int naas,bool *codP,bool *aaP,Parameter ** gtrParams,FreqParamGroup *nucleoFreqs,double *ssrfParams,double *vecPreAlloc,int nSitesInWholeProtein,double **mm) : ModelWEig(ncods,vecPreAlloc) { assert(sizeof(short) == 2);//if this fails SSRFCodonSubMod::NumShortsPerCharacter will have to be changed #ifdef ALLOWMULTIHITS nparams=11+naas; #else nparams=10+naas; #endif nPossAAs=naas; mutMat=mm; param=new Parameter *[nparams]; param[SSRFCodonSubMod::blenMult]=gtrParams[SSRFCodonSubMod::blenMult]; #ifdef ALLOWMULTIHITS param[SSRFCodonSubMod::pMultHit]=gtrParams[SSRFCodonSubMod::pMultHit]; #endif param[SSRFCodonSubMod::freqA]=gtrParams[SSRFCodonSubMod::freqA]; param[SSRFCodonSubMod::freqC]=gtrParams[SSRFCodonSubMod::freqC]; param[SSRFCodonSubMod::freqG]=gtrParams[SSRFCodonSubMod::freqG]; param[SSRFCodonSubMod::freqT]=gtrParams[SSRFCodonSubMod::freqT]; param[SSRFCodonSubMod::rAC]=gtrParams[SSRFCodonSubMod::rAC]; param[SSRFCodonSubMod::rAG]=gtrParams[SSRFCodonSubMod::rAG]; param[SSRFCodonSubMod::rAT]=gtrParams[SSRFCodonSubMod::rAT]; param[SSRFCodonSubMod::rCG]=gtrParams[SSRFCodonSubMod::rCG]; param[SSRFCodonSubMod::rCT]=gtrParams[SSRFCodonSubMod::rCT]; param[SSRFCodonSubMod::rGT]=gtrParams[SSRFCodonSubMod::rGT]; sharedBrLenInterpreter=gtrParams[SSRFCodonSubMod::rGT+1]; SharedConstruction(codP,aaP,nucleoFreqs,ssrfParams,nSitesInWholeProtein); } void SSRFCodonSubMod::SharedConstruction(bool *codP,bool *aaP,FreqParamGroup *nucleoFreqs,double *ssrfParams,int nSitesInWholeProtein) { overflowMultiplier=1.0; SSRkContrib=0.0; nCodonsInProtein=nSitesInWholeProtein; nfreqParamGroups=2; baseFreqs=nucleoFreqs; int i; codonLocToGlob=new int[nstates]; codLocToAminoAcidLoc=new int[nstates]; aminoAcidLocToGlob=new int[nPossAAs]; int thisCodonName=0; for (i=0;i < 64;i++) if (codP[i]) {assert(thisCodonName < nstates); codonGlobToLoc[i]=thisCodonName; codonLocToGlob[thisCodonName]=i; thisCodonName++; } else codonGlobToLoc[i]=-1; int thisAAName=0; for (i=0;i < 21;i++) if (aaP[i]) {assert(thisAAName < nPossAAs); aminoAcidLocToGlob[thisAAName]=i; thisAAName++; } for (i=0;i < nstates;i++) {int translated=CodNum[codonLocToGlob[i]]; bool found=false; for (int j=0;(!found && j < nPossAAs);j++) if (translated == aminoAcidLocToGlob[j]) {found=true; codLocToAminoAcidLoc[i]=j; } } double OneMinusOthers=1.0; for (int j=0;j < 20;j++) OneMinusOthers-=ssrfParams[j]; if (nPossAAs == 1) param[SSRFCodonSubMod::fAAs]=new FullParameter(1.0,0.0,1.0,par(MIN)|par(MAX)|par(CUR)|par(FIX),defA,"Amino"); else for (i=0;i < nPossAAs;i++) if (aminoAcidLocToGlob[i] == 20) #ifdef NO_STOP_CODONS throw BadParams("Amino Acid Freqs don't add up to 1"); #else param[SSRFCodonSubMod::fAAs+i]=new FullParameter(OneMinusOthers,0.0,1.0,par(MIN)|par(MAX)|par(CUR),defA,"Stop"); #endif else param[SSRFCodonSubMod::fAAs+i]=new FullParameter(ssrfParams[aminoAcidLocToGlob[i]],0.0,1.0,par(MIN)|par(MAX)|par(CUR),defA,"Amino"); aaFreqs=new FreqParamGroup(nPossAAs,(param+SSRFCodonSubMod::fAAs)); stateFreqs=NULL; codFreqs=new double*[nstates];//awkward because in other models the state freqs might not be contiguous in memory *codFreqs=new double[nstates]; for (int j=1;j < nstates;j++) codFreqs[j]=(codFreqs[0]+j); for (int j=0;j < nstates;j++) for (int k=0;k < nstates;k++) qMatrix[j][k]=0.0; #ifdef ELIMINATE_ALL_ZEROS maxcodonLocToGlob=new int[nstates]; maxcodLocToAminoAcidLoc=new int[nstates]; maxaminoAcidLocToGlob=new int[nPossAAs]; for (int i = 0 ;i < nstates;i++) {maxcodonLocToGlob[i]=codonLocToGlob[i]; maxcodLocToAminoAcidLoc[i]=codLocToAminoAcidLoc[i]; } for (int i = 0 ;i < nPossAAs;i++) maxaminoAcidLocToGlob[i]=aminoAcidLocToGlob[i]; for (int i=0;i < 64;i++) origCodonGlobToLoc[i]=codonGlobToLoc[i]; maxNStates=nstates; maxNPossAAs=nPossAAs; OrigParamArray=new Parameter *[nPossAAs]; for (i=0;i < nPossAAs;i++) OrigParamArray[i]=param[SSRFCodonSubMod::fAAs+i]; //ResizeModel();//don't do this so the correct size arrays of characters will be allocated in the terminal Like Attr #endif CalculateQ(); //initializes RateConst parameter and codon freqs } #ifdef ELIMINATE_ALL_ZEROS void SSRFCodonSubMod::ResizeModel() { //first step is to move all amino acids of freq 0 to the back of the param list and keep non zeros in the correct order eigencalc=qmatcalc=false; int destInd=0,i; nPossAAs=0; for (i=0;i < maxNPossAAs;i++) if (OrigParamArray[i]->val>0.0) {param[SSRFCodonSubMod::fAAs+destInd]=OrigParamArray[i]; destInd++; aminoAcidLocToGlob[nPossAAs]=maxaminoAcidLocToGlob[i]; nPossAAs++; } for (i=0;i < maxNPossAAs;i++)//not necessary just want to make sure param array has all of the parameters if (OrigParamArray[i]->val <= 0.0) {param[SSRFCodonSubMod::fAAs+destInd]=OrigParamArray[i]; destInd++; } assert(destInd == maxNPossAAs); //Fix the four code to code translation tables nstates=0; for (i=0;i < maxNStates;i++) if (OrigParamArray[maxcodLocToAminoAcidLoc[i]]->val>0.0)//if this codon is still present {codonLocToGlob[nstates]=maxcodonLocToGlob[i]; for (int j=0;j < maxNPossAAs;j++) if (OrigParamArray[maxcodLocToAminoAcidLoc[i]] == param[SSRFCodonSubMod::fAAs+j]) codLocToAminoAcidLoc[nstates]=j; nstates++; } //fill in codon Glob to loc with -1's then change all still used codons to the correct number for (i=0;i < maxNStates;i++) codonGlobToLoc[maxcodonLocToGlob[i]]=-1; for (i=0;i < nstates;i++) codonGlobToLoc[codonLocToGlob[i]]=i; //alter all of multidimensional arrays DANGER ONLY WORKS IF THEY ARE ALLOCATED PSDMATRIX STYLE for (i=1;i < nstates;i++) {pMatrix[0][i]=(pMatrix[0][i-1]+nstates); REigenVector[i]=REigenVector[i-1]+nstates; InvEigenVector[i]=InvEigenVector[i-1]+nstates; CEigenVector[i]=CEigenVector[i-1]+nstates; CInvEigenVector[i]=CInvEigenVector[i-1]+nstates; qMatrix[i]=qMatrix[i-1]+nstates; } CalculateCodonFreqs();//unnecessary as long as I'm being sloppy and calling calc codon freqs every time I calc Q } bool SSRFCodonSubMod::NeedToExpandPossibleAA() { #ifdef NO_STOP_CODONS if (maxNPossAAs < 20) {bool allcodons[64],allaminoacids[21]; for (int i=0;i < 64;i++) allcodons[i]=false; for (int i=0;i < 21;i++) allaminoacids[i]=false; for (int i = 0 ;i < nstates;i++) {allcodons[maxcodonLocToGlob[i]]=true; allaminoacids[CodNum[maxcodonLocToGlob[i]]]=true; } int newMaxNPossAAs=GetNumberOfPossibleAminoAcids(allcodons,allaminoacids); if (newMaxNPossAAs>maxNPossAAs) return true; for (int i = 0 ;i < maxNStates;i++) allcodons[maxcodonLocToGlob[i]]=false;//set all of the codons possible in the original size model back to false for (int i=0;i < 64;i++)//anything true is between to currently non zero codons and isn't included in the original model size if (allcodons[i]) return true; /*{//need to expand the model the goal is to re do everything the the constructors Got most of the way through and then realized that the data coding woudld have to fundamentally change (chars in shorts in terminal likelihood attributes are stored relative to the original model size)*/ /* int newMaxNPossCod=GetNumberOfPossibleCodons(allaminoacids); //free and reallocate all memory allocated when Model() is called free_psdmatrices(pMatrix,1); pMatrix=psdmatrices(newMaxNPossCod,1); //free and reallocate all memory allocated when ModelWEig() is called delete [] REigenValues; delete [] ImEigenValues; free_psdmatrix(REigenVector); free_psdmatrix(InvEigenVector); free_psdmatrix(CEigenVector); free_psdmatrix(CInvEigenVector); free_psdmatrix(qMatrix); delete [] EigExp; eigencalc=false; #ifdef PRESUMMING_CONSTS delete []preSummed; #endif ModelWEig::SharedConstruction(newMaxNPossCod); EigInvEigMat=preAllocEIEM; //Stuff in the SSRFCodonSubMod Constructor #ifdef ALLOWMULTIHITS nparams=11+newMaxNPossAAs; #else nparams=10+newMaxNPossAAs; #endif nPossAAs=newMaxNPossAAs; Parameter **newparams; newparam=new Parameter *[nparams]; newparam[SSRFCodonSubMod::blenMult]=param[SSRFCodonSubMod::blenMult]; #ifdef ALLOWMULTIHITS newparam[SSRFCodonSubMod::pMultHit]=param[SSRFCodonSubMod::pMultHit]; #endif newparam[SSRFCodonSubMod::freqA]=param[SSRFCodonSubMod::freqA]; newparam[SSRFCodonSubMod::freqC]=param[SSRFCodonSubMod::freqC]; newparam[SSRFCodonSubMod::freqG]=param[SSRFCodonSubMod::freqG]; newparam[SSRFCodonSubMod::freqT]=param[SSRFCodonSubMod::freqT]; newparam[SSRFCodonSubMod::rAC]=param[SSRFCodonSubMod::rAC]; newparam[SSRFCodonSubMod::rAG]=param[SSRFCodonSubMod::rAG]; newparam[SSRFCodonSubMod::rAT]=param[SSRFCodonSubMod::rAT]; newparam[SSRFCodonSubMod::rCG]=param[SSRFCodonSubMod::rCG]; newparam[SSRFCodonSubMod::rCT]=param[SSRFCodonSubMod::rCT]; newparam[SSRFCodonSubMod::rGT]=param[SSRFCodonSubMod::rGT]; //Stuff in the SSRFCodonSubMod::SharedConstruction double *newcodonLocToGlob=new int[newMaxNPossCod]; double *newcodLocToAminoAcidLoc=new int[newMaxNPossCod]; double *newaminoAcidLocToGlob=new int[newMaxNPossAAs]; int thisCodonName=0; for (i=0;i < 64;i++) if (allcodons[i]) {assert(thisCodonName < newMaxNPossCod); newcodonGlobToLoc[i]=thisCodonName; newcodonLocToGlob[thisCodonName]=i; thisCodonName++; } else newcodonGlobToLoc[i]=-1; int thisAAName=0; for (i=0;i < 21;i++) if (allaminoacids[i]) {assert(thisAAName < newMaxNPossAAs); newaminoAcidLocToGlob[thisAAName]=i; thisAAName++; } for (i=0;i < newMaxNPossCod;i++) {int translated=CodNum[newcodonLocToGlob[i]]; bool found=false; for (int j=0;(!found && j < newMaxNPossAAs);j++) if (translated == newaminoAcidLocToGlob[j]) {found=true; newcodLocToAminoAcidLoc[i]=j; } } int oldindex=0; for (i=0;i < newMaxNPossAAs;i++) {if(newaminoAcidLocToGlob[i] == aminoAcidLocToGlob[oldindex]) {newparam[SSRFCodonSubMod::fAAs+i]=OrigParamArray[oldindex]; oldindex++; } else newparam[SSRFCodonSubMod::fAAs+i]=new FullParameter(0.0,0.0,1.0,par(MIN)|par(MAX)|par(CUR),defA,"Amino"); } delete param param=newparam; delete aaFreqs; aaFreqs=new FreqParamGroup(newMaxNPossAAs,(param+SSRFCodonSubMod::fAAs)); delete [] codonLocToGlob; codonLocToGlob = newcodonLocToGlob; delete [] codLocToAminoAcidLoc; codLocToAminoAcidLoc = newcodLocToAminoAcidLoc; delete [] aminoAcidLocToGlob; aminoAcidLocToGlob= newaminoAcidLocToGlob; stateFreqs=NULL; delete [] *codFreqs; delete [] codFreqs; codFreqs=new double*[newMaxNPossCod];//awkward because in other models the state freqs might not be contiguous in memory *codFreqs=new double[newMaxNPossCod]; for (int j=1;j < newMaxNPossCod;j++) codFreqs[j]=(codFreqs[0]+j); for (int j=0;j < newMaxNPossCod;j++) for (int k=0;k < newMaxNPossCod;k++) qMatrix[j][k]=0.0; #ifdef ELIMINATE_ALL_ZEROS delete [] maxcodonLocToGlob; maxcodonLocToGlob=new int[newMaxNPossCod]; delete [] maxcodLocToAminoAcidLoc; maxcodLocToAminoAcidLoc=new int[newMaxNPossCod]; delete [] maxaminoAcidLocToGlob; maxaminoAcidLocToGlob=new int[nPossAAs];//this is about as far as I got when I realized that I was screwed for (int i = 0 ;i < nstates;i++) {maxcodonLocToGlob[i]=codonLocToGlob[i]; maxcodLocToAminoAcidLoc[i]=codLocToAminoAcidLoc[i]; } for (int i = 0 ;i < nPossAAs;i++) maxaminoAcidLocToGlob[i]=aminoAcidLocToGlob[i]; for (int i=0;i < 64;i++) origCodonGlobToLoc[i]=codonGlobToLoc[i]; maxNStates=nstates; maxNPossAAs=nPossAAs; OrigParamArray=new Parameter *[nPossAAs]; for (i=0;i < nPossAAs;i++) OrigParamArray[i]=param[SSRFCodonSubMod::fAAs+i]; //ResizeModel();//don't do this so the correct size arrays of characters will be allocated in the terminal Like Attr #endif CalculateQ(); //initializes RateConst parameter and codon freqs }*/ } return true; return false; #else assert(0); #endif } #endif void SSRFCodonSubMod::CalculateCodonFreqs() { double *codF; codF=*codFreqs; double numerators[6]; double tot=0.0; for (int thisaa=0;thisaa < nPossAAs;thisaa++) {double aaf=param[SSRFCodonSubMod::fAAs+thisaa]->val; int ndegcod=NCodByAA[aminoAcidLocToGlob[thisaa]]; double denominator=0.0; for (int thiscodon=0;thiscodon < ndegcod;thiscodon++) {int thisglobcod=CodByaa[aminoAcidLocToGlob[thisaa]][thiscodon]; assert(thisglobcod!=65); denominator+=numerators[thiscodon]=*baseFreqMat[tfirbase[thisglobcod]]**baseFreqMat[tsecbase[thisglobcod]]**baseFreqMat[tthibase[thisglobcod]]; } for (int thiscodon=0;thiscodon < ndegcod;thiscodon++) {int thisglobcod=CodByaa[aminoAcidLocToGlob[thisaa]][thiscodon]; assert(thisglobcod!=65); tot+=codF[codonGlobToLoc[thisglobcod]]=aaf*numerators[thiscodon]/denominator; } } if (!(fabs(tot-1.0)<0.00000001)) throw MTHException("Codon Frequencies don't add up to 1"); } SSRFCodonSubMod::~SSRFCodonSubMod() { for (int i=SSRFCodonSubMod::fAAs;i < nparams;i++)//don't delete the shared GTR parameters delete param[i]; delete [] param; EigInvEigMat=NULL;//shared memory delete [] codonLocToGlob; delete [] aminoAcidLocToGlob; delete [] codLocToAminoAcidLoc; #ifdef ELIMINATE_ALL_ZEROS delete [] maxcodonLocToGlob; delete [] maxcodLocToAminoAcidLoc; delete [] maxaminoAcidLocToGlob; delete [] OrigParamArray; #endif } void SSRFCodonSubMod::DeleteSharedMemory() { delete [] EigInvEigMat; } //////////////////////////////////////////////////////////////////////////////// // Beta is a factor that is multiplied to the branchlength to make the eqns in // Molecular Systematics work. This the constraint that Sum qii * freq(i) = -1 this ensures that the branches are in length // that are expected numbers of changes // SSRkContrib is the defined k constant, this function maintains the rates // For GTR the rate params are scaled down to maintain their same ratio, but make beta=1.0 //////////////////////////////////////////////////////////////////////////////// double SSRFCodonSubMod::CalculateBeta() { double x = 0.0; double *qm = *qMatrix; //assumes that calculate code freq; and that the rows sum to zero double *codF = *codFreqs; for (int row=0; row .05"); } tempp=mutMat[0]; // now treating param[SSRFCodonSubMod::pMultHit] as a multiplier to modify the rate of double/ triple hits //its affect is dependent on PMUT (which is a define so it won't change in a run) if param[SSRFCodonSubMod::pMultHit]is //chance of one mut is PMUT, double is simply (PMUT^2)* param[SSRFCodonSubMod::pMultHit] , three is (PMUT^3) * param[SSRFCodonSubMod::pMultHit]^2, //this is a quasi-probability based method done to mimic AH's implementation (if set to 1.0 you get his version) for (i=0;i < 4;i++) for (j=0;j < 4;j++) for (k=0;k < 4;k++) for (l=0;l < 4;l++) for (m=0;m < 4;m++) for (n=0;n < 4;n++) {*tempp=ssmm[4*i+l]*ssmm[4*j+m]*ssmm[4*k+n]; if (i!=l) {if(j!=m) {if(k!=n) *tempp*=(param[SSRFCodonSubMod::pMultHit]->val)*(param[SSRFCodonSubMod::pMultHit]->val); else *tempp*=(param[SSRFCodonSubMod::pMultHit]->val); } else {if(k!=n) *tempp*=(param[SSRFCodonSubMod::pMultHit]->val); } } else {if(j!=m) {if(k!=n) *tempp*=(param[SSRFCodonSubMod::pMultHit]->val); } } tempp++; } //last step to scale mut rates up to reasonable values (weighted mu is 1.0) tempp=mutMat[0]; temp=0.0; int row=0; for (i=0;i < 4;i++) for (j=0;j < 4;j++) for (k=0;k < 4;k++) {mutMat[row][row]=0.0; for (l=0;l < 64;l++) if (l!=row) mutMat[row][row]-=mutMat[row][l]; temp-=mutMat[row][row]**baseFreqMat[i]**baseFreqMat[j]**baseFreqMat[k]; row++; } for (int i=0;i < 64;i++) for (int j=0;j < 64;j++) mutMat[i][j]/=temp; #else //New version of multihits, doesn't rely on quasi discrete approximation. all single mutation events // occur at 1-PrMultiHit-PrMultiHit^2, and all double hits occur at PrMultiHit, and all triple hits occur at PrMultiHit^2 double *tempp; tempp=mutMat[0]; double pmhsq; pmhsq=(param[SSRFCodonSubMod::pMultHit]->val)*(param[SSRFCodonSubMod::pMultHit]->val); double psingle=1.0-(param[SSRFCodonSubMod::pMultHit]->val)-pmhsq; // first step to scale mut rates up to consistent meaning to the MultiHit parameter prob not necessary because matrix is scaled again double scalednrm[4][4]; double temp=0.0; for (int i=0;i < 4;i++) for (j=0;j < 4;j++) {if(i!=j) {scalednrm[i][j]=*nucRateMat[i][j]; temp+=scalednrm[i][j]**baseFreqMat[j]**baseFreqMat[i]; } else scalednrm[i][j]=1.0; } for (int i=0;i < 4;i++) for (j=0;j < 4;j++) {if(i!=j) scalednrm[i][j]/=temp; } for (i=0;i < 4;i++) for (j=0;j < 4;j++) for (k=0;k < 4;k++) for (l=0;l < 4;l++) for (m=0;m < 4;m++) for (n=0;n < 4;n++) {//OLD NEW WAY allows first and third to be mutated as often as first and second or second and third /* *tempp=1.0; int nmuts=0; if (i!=l) {*tempp*=scalednrm[i][l]**baseFreqMat[l]; nmuts++; } if (j!=m) {*tempp*=scalednrm[j][m]**baseFreqMat[m]; nmuts++; } if (k!=n) {*tempp*=scalednrm[k][n]**baseFreqMat[n]; nmuts++; } if (nmuts == 1) *tempp*=psingle; else if (nmuts == 2) *tempp*=(param[SSRFCodonSubMod::pMultHit]->val); else if (nmuts == 3) *tempp*=pmhsq; tempp++; */ if (i!=l) //first changed {if(j!=m) //second changed {if(k!=n) // all three changed *tempp++=scalednrm[i][l]**baseFreqMat[l]*scalednrm[j][m]**baseFreqMat[m]*scalednrm[k][n]**baseFreqMat[n]*pmhsq; else //first and second changed *tempp++=scalednrm[i][l]**baseFreqMat[l]*scalednrm[j][m]**baseFreqMat[m]*(param[SSRFCodonSubMod::pMultHit]->val); } else {if(k!=n) //first and third changed - happens more or less at the at the three mutation rate *tempp++=scalednrm[i][l]**baseFreqMat[l]*scalednrm[k][n]**baseFreqMat[n]*pmhsq; else *tempp++=scalednrm[i][l]**baseFreqMat[l]*psingle; //just first changed } } else {if(j!=m) //sec changed {if(k!=n) //sec and third changed *tempp++=scalednrm[j][m]**baseFreqMat[m]*scalednrm[k][n]**baseFreqMat[n]*(param[SSRFCodonSubMod::pMultHit]->val); else //just sec changed *tempp++=scalednrm[j][m]**baseFreqMat[m]*psingle; } else {if(k!=n) //just third changed *tempp++=scalednrm[k][n]**baseFreqMat[n]*psingle; else //none changed - on diagonal set to zero *tempp++=0.0; } } } //last step to scale mut rates up to reasonable values (weighted mu is 1.0) tempp=mutMat[0]; temp=0.0; int row=0; for (i=0;i < 4;i++) for (j=0;j < 4;j++) for (k=0;k < 4;k++) {mutMat[row][row]=0.0; for (l=0;l < 64;l++) if (l!=row) mutMat[row][row]-=mutMat[row][l]; temp-=mutMat[row][row]**baseFreqMat[i]**baseFreqMat[j]**baseFreqMat[k]; row++; } for (int i=0;i < 64;i++) for (int j=0;j < 64;j++) mutMat[i][j]/=temp; #endif #else double *tempp; tempp=mutMat[0]; for (i=0;i < 4;i++) for (j=0;j < 4;j++) for (k=0;k < 4;k++) for (l=0;l < 4;l++) for (m=0;m < 4;m++) for (n=0;n < 4;n++) if (i!=l) if (j!=m || k!=n) *tempp++=0.0; else *tempp++=*nucRateMat[i][l]**baseFreqMat[l]; else if (j!=m) if (k!=n) *tempp++=0.0; else *tempp++=*nucRateMat[j][m]**baseFreqMat[m]; else if (k!=n) *tempp++=*nucRateMat[k][n]**baseFreqMat[n]; else *tempp++=0.0; //last step to scale mut rates up to reasonable values (weighted mu is 1.0) tempp=mutMat[0]; double temp=0.0; int row=0; for (i=0;i < 4;i++) for (j=0;j < 4;j++) for (k=0;k < 4;k++) {mutMat[row][row]=0.0; for (l=0;l < 64;l++) if (l!=row) mutMat[row][row]-=mutMat[row][l]; temp-=mutMat[row][row]**baseFreqMat[i]**baseFreqMat[j]**baseFreqMat[k]; row++; } for (int i=0;i < 64;i++) for (int j=0;j < 64;j++) mutMat[i][j]/=temp; #endif mutParamsDirty=false; } void SSRFCodonSubMod::CalculateQ(void) { sharedBrLenInterpreter->val-=SSRkContrib/nCodonsInProtein;//subtract out contribution of old qmatrix to mean rate of change /*double rpAC=param[SSRFCodonSubMod::rAC]->val; double rpAG=param[SSRFCodonSubMod::rAG]->val; double rpAT=param[SSRFCodonSubMod::rAT]->val; double rpCG=param[SSRFCodonSubMod::rCG]->val; double rpCT=param[SSRFCodonSubMod::rCT]->val; double rpGT=param[SSRFCodonSubMod::rGT]->val; double fpA=param[SSRFCodonSubMod::freqA]->val; double fpC=param[SSRFCodonSubMod::freqC]->val; double fpG=param[SSRFCodonSubMod::freqG]->val; double fpT=param[SSRFCodonSubMod::freqT]->val;*/ double *qm,tempx; double *codF; CalculateCodonFreqs(); codF=*codFreqs; if (mutParamsDirty) RecalculateMutMatrix(); int nsq=nstates*nstates; qm=qMatrix[0]; for ( int i=0;i < nsq;i++) *qm++=0.0; int currFromFirBase,currFromSecBase,currFromThiBase; int globTocod,globFromcod,locTocod,locToAA,locFromAA; #ifdef ALLOWMULTIHITS int currToFirBase,currToSecBase,currToThiBase; for (int row=0;row < nstates;row++) {qMatrix[row][row]=0.0; locFromAA=codLocToAminoAcidLoc[row]; if (param[SSRFCodonSubMod::fAAs+locFromAA]->val>0.0) {globFromcod=codonLocToGlob[row]; currFromFirBase=tfirbase[globFromcod]; currFromSecBase=tsecbase[globFromcod]; currFromThiBase=tthibase[globFromcod]; for (currToFirBase=0;currToFirBase < 4;currToFirBase++) for (currToSecBase=0;currToSecBase < 4;currToSecBase++) for (currToThiBase=0;currToThiBase < 4;currToThiBase++) {globTocod=16*currToFirBase + 4*currToSecBase + currToThiBase; locTocod=codonGlobToLoc[globTocod]; if (locTocod!=-1 && locTocod!=row) //if amino acid could have freq >0.0 {locToAA=codLocToAminoAcidLoc[locTocod]; if (locToAA == locFromAA) //same amino acid qMatrix[row][row]-=qMatrix[row][locTocod]=mutMat[globFromcod][globTocod]; else if (codF[locTocod]val>0.0) {globFromcod=codonLocToGlob[row]; currFromFirBase=tfirbase[globFromcod]; currFromSecBase=tsecbase[globFromcod]; currFromThiBase=tthibase[globFromcod]; //try mutating first base for (currToBase=0;currToBase < 4;currToBase++) if (currToBase!=currFromFirBase) //only check if it is a mutant {globTocod=16*currToBase + 4*currFromSecBase + currFromThiBase; locTocod=codonGlobToLoc[globTocod]; if (locTocod!=-1) //if amino acid could have freq >0.0 {locToAA=codLocToAminoAcidLoc[locTocod]; if (locToAA == locFromAA) //same amino acid {qMatrix[row][row]-=qMatrix[row][locTocod]=mutMat[globFromcod][globTocod]; } else if (codF[locTocod]0.0 {locToAA=codLocToAminoAcidLoc[locTocod]; if (locToAA == locFromAA) //same amino acid {qMatrix[row][row]-=qMatrix[row][locTocod]=*nucRateMat[currFromSecBase][currToBase]**baseFreqMat[currToBase]; } else if (codF[locTocod]0.0 {locToAA=codLocToAminoAcidLoc[locTocod]; if (locToAA == locFromAA) //same amino acid {qMatrix[row][row]-=qMatrix[row][locTocod]=mutMat[globFromcod][globTocod]; } else if (codF[locTocod]0.0 {locToAA=codLocToAminoAcidLoc[locTocod]; if (locToAA == locFromAA) //same amino acid {qMatrix[row][row]-=qMatrix[row][locTocod]=mutMat[globFromcod][globTocod]; } else if (codF[locTocod]val+=SSRkContrib/nCodonsInProtein;//add in the contribution of the new qmatrix to mean rate of change /*ofstream tempout("Bullout",ios::app); tempout<StartWithCurrent()) ; else if (param[i]->StartWithRandom()) throw IncompleteModel("Random Function to initialize parameters isn't available yet"); //param[i]->val=SomeRandomNumberFunction(); else if (param[i]->StartWithApproximation()) throw IncompleteModel("Initial approximation of parameters isn't available yet"); else if (param[i]->StartWithDefault()) param[i]->SetToDefault(); else throw BadSettings("No starting value of a parameter has been defined"); } } void SSRFCodonSubMod::EncodeACharacter(short *dest,short *raw,int datatype,bool keepGap) { assert((datatype == EncodingType(MitoCodons) || datatype == EncodingType(NucCodons)) && !keepGap); //assumes that a codon code will take up 4 shorts; assert(sizeof(short) == 2); short *temp; temp=dest; *dest=0; short smask,fb,sb,tb; for (int i=0;i < nstates;i++) {fb=sb=tb=1; fb <<= tfirbase[codonLocToGlob[i]]; sb <<= tsecbase[codonLocToGlob[i]]; tb <<= tthibase[codonLocToGlob[i]]; if (i%16 == 0) {smask=1; if (i) *++dest=0; } else smask <<= 1; if ((fb&raw[0]) && (sb&raw[1]) && (tb&raw[2])) (*dest)|=smask; } bool atLeastOne=false; for (int i=0;(!atLeastOne && i < nstates);i++) if (temp[i]) atLeastOne=true; if (!atLeastOne) temp[0]=0; } void SSRFCodonSubMod::UpdatePmat(double b) { ModelWEig::UpdatePMatrix(*pMatrix,param[SSRFCodonSubMod::blenMult]->val*b); } void SSRFCodonSubMod::UpdatePmat(double b,int onlycol) { ModelWEig::UpdatePMatrix(*pMatrix,param[SSRFCodonSubMod::blenMult]->val*b,onlycol); } void SSRFCodonSubMod::UpdatePMatrix(double **pm,double b) { ModelWEig::UpdatePMatrix(pm,param[SSRFCodonSubMod::blenMult]->val*b); } void SSRFCodonSubMod::UpdatePMatrix(double **pm,double b,int onlycol) { ModelWEig::UpdatePMatrix(pm,param[SSRFCodonSubMod::blenMult]->val*b,onlycol); } void SSRFCodonSubMod::UpdatePRow(double **pm,double b,int onlycol) { ModelWEig::UpdatePRow(pm,param[SSRFCodonSubMod::blenMult]->val*b,onlycol); } void SSRFCodonSubMod::UpdatePmatWithOutSharedMatrix(double **pmats,double b) { ModelWEig::UpdatePmatWithOutSharedMatrix(*pMatrix,param[SSRFCodonSubMod::blenMult]->val*b); } void SSRFCodonSubMod::UpdatePmatWithOutSharedMatrix(double **pmats,double b,int onlycol) { ModelWEig::UpdatePmatWithOutSharedMatrix(*pMatrix,param[SSRFCodonSubMod::blenMult]->val*b,onlycol); } FreqParamGroup *SSRFCodonSubMod::GetFreqParamGroup(int i) { assert(i < nfreqParamGroups); if (i) return aaFreqs; return baseFreqs; } int SSRFCodonSubMod::GetEncodingType() { if (geneticCode == GenCode(Mito)) return EncodingType(MitoCodons); assert(geneticCode == GenCode(Nuclear)); return EncodingType(NucCodons); } int SSRFCodonSubMod::GetNStates() { return nstates; } void SSRFCodonSubMod::AlertSharedMemory() { if (eigencalc) { #ifdef CONDENSE_MATRICES #ifdef PRESUMMING_CONSTS if (numberOfLastnonZero < nstates-1)//DAnger do this better CalculateEigInvEigMultAndPreSum(*REigenVector,InvEigenVector,nstates,EigInvEigMat,preSummed,numberOfLastnonZero); else #endif CalculateAndCondenseEigInvEigMult(*REigenVector,InvEigenVector,nstates,EigInvEigMat); #else #ifdef PRESUMMING_CONSTS if (numberOfLastnonZero < nstates-1)//DAnger do this better {CalculateReorderedEigInvEigMult(*REigenVector,InvEigenVector,nstates,EigInvEigMat); DoPreSummation(EigInvEigMat,*preSummed,numberOfLastnonZero,nstates); } else #endif CalculateGlobalEigInvEigMult(*REigenVector,InvEigenVector,nstates,EigInvEigMat); #endif } } double SSRFCodonSubMod::GetMultiplier() const { return overflowMultiplier; } void SSRFCodonSubMod::SetMultiplier(double x) { overflowMultiplier=x; } void SSRFCodonSubMod::ParameterHasChanged(FreqParamGroup *p) {eigencalc=qmatcalc=false; //cout<<"In OverLoaded SSRFCodonSubMod::ParameterHasCHanged(FreqParamGroup*)\n"; if (baseFreqs == p) mutParamsDirty=true; } void SSRFCodonSubMod::FreqParamChangesShouldSumToOne(FreqParamGroup *p) {eigencalc=qmatcalc=false; //cout<<"In OverLoaded SSRFCodonSubMod::ParameterHasCHanged(FreqParamGroup*)\n"; if (baseFreqs == p) mutParamsDirty=true; #ifdef ELIMINATE_ALL_ZEROS else {int i;bool needToResize=false; assert(p == aaFreqs); aaFreqs->ForceToSumToOne(MINIMUMAAFREQABOVEZERO); for (i=0;i < nPossAAs;i++) if (param[SSRFCodonSubMod::fAAs+i]->val < MINIMUMAAFREQABOVEZERO) needToResize=true; for (;i < maxNPossAAs;i++) if (param[SSRFCodonSubMod::fAAs+i]->val>0.0) needToResize=true; if (needToResize) {cout<<"About To Resize the Model\n"; ResizeModel(); throw NeedToRecodeException(); } } #endif } void SSRFCodonSubMod::ParameterHasChanged(Parameter *p) {if(p == gammashape) CalculateRates(); else {if(p!=pinv && p!=param[SSRFCodonSubMod::blenMult]) eigencalc=qmatcalc=false; #ifdef ALLOWMULTIHITS for (int i=SSRFCodonSubMod::pMultHit;((!mutParamsDirty) && i < SSRFCodonSubMod::fAAs);i++) #else for (int i=SSRFCodonSubMod::freqA;((!mutParamsDirty) && i < SSRFCodonSubMod::fAAs);i++) #endif if (p == param[i]) mutParamsDirty=true; #ifdef ELIMINATE_ALL_ZEROS //check to see if need to resize and recode if (p->GetName() == "Amino") {if(p->val>0.0) for (int i=0;i < nstates;i++) if (param[SSRFCodonSubMod::fAAs+i] == p) return; ResizeModel(); throw NeedToRecodeException(); } #endif } } #ifdef ELIMINATE_ALL_ZEROS int SSRFCodonSubMod::GetMaxNStates() { return maxNStates; } int *SSRFCodonSubMod::GetOrigLocToGlob() { return maxcodonLocToGlob; } int *SSRFCodonSubMod::GetCurrentGlobToLoc() { return codonGlobToLoc; } int *SSRFCodonSubMod::GetCurrentLocToGlob() { return codonLocToGlob; } #endif double **SSRFCodonSubMod::GetStateFreqs() { return codFreqs; } int SSRFCodonSubMod::NumShortsPerCharacter() { return (1+(int)floor(((double)nstates-.1)/16.0)); }//written to allow odd coding of SSRFCodonSubModel which overrides int SSRFCodonSubMod::NumStatesInLastShort() { return 1+((nstates-1)%16); }//written to allow odd coding of SSRFCodonSubModel which overrides void bull::CreateSetOfSSRFCodonSubMods(int nSitesInWholeProtein,double *gtrp/* at least 8 doubles long */,double **setofSSRFparams/* full size=nm*21 */,SSRFCodonSubMod **arrayOfMods,double *multipliers,double treesc,int gcode) { double *vecPreAlloc; #ifdef NO_STOP_CODONS if (gcode == GenCode(Mito)) vecPreAlloc=new double [216000];//60^3 else {assert(gcode == GenCode(Nuclear)); vecPreAlloc=new double [226981];//61^3 } #else vecPreAlloc=new double [262144];//64^3 #endif Parameter **param; param=new Parameter *[SSRFCodonSubMod::rGT+2]; param[SSRFCodonSubMod::blenMult]=new PositiveParameter(treesc,par(MIN)|par(CUR),1.0); #ifdef ALLOWMULTIHITS param[SSRFCodonSubMod::pMultHit]=new PositiveParameter(gtrp[9],par(MIN)|par(CUR),0.0); #endif param[SSRFCodonSubMod::freqA]=new FullParameter(gtrp[0],bull::kSmallDbl,1.0-bull::kSmallDbl,par(MIN)|par(MAX)|par(CUR),SSRFCodonSubMod::defFreqA,"freqA"); param[SSRFCodonSubMod::freqC]=new FullParameter(gtrp[1],bull::kSmallDbl,1.0-bull::kSmallDbl,par(MIN)|par(MAX)|par(CUR),SSRFCodonSubMod::defFreqC,"freqC"); param[SSRFCodonSubMod::freqG]=new FullParameter(gtrp[2],bull::kSmallDbl,1.0-bull::kSmallDbl,par(MIN)|par(MAX)|par(CUR),SSRFCodonSubMod::defFreqG,"freqG"); param[SSRFCodonSubMod::freqT]=new FullParameter(1.0-gtrp[0]-gtrp[1]-gtrp[2],bull::kSmallDbl,1.0-bull::kSmallDbl,par(MIN)|par(MAX)|par(CUR),1.0-SSRFCodonSubMod::defFreqA-SSRFCodonSubMod::defFreqC-SSRFCodonSubMod::defFreqG,"freqT"); param[SSRFCodonSubMod::rAC]=new PositiveParameter(gtrp[3],par(MIN)|par(CUR),SSRFCodonSubMod::defrAC); param[SSRFCodonSubMod::rAG]=new PositiveParameter(gtrp[4],par(MIN)|par(CUR),SSRFCodonSubMod::defrAG); param[SSRFCodonSubMod::rAT]=new PositiveParameter(gtrp[5],par(MIN)|par(CUR),SSRFCodonSubMod::defrAT); param[SSRFCodonSubMod::rCG]=new PositiveParameter(gtrp[6],par(MIN)|par(CUR),SSRFCodonSubMod::defrCG); param[SSRFCodonSubMod::rCT]=new PositiveParameter(gtrp[7],par(MIN)|par(CUR),SSRFCodonSubMod::defrCT); param[SSRFCodonSubMod::rGT]=new PositiveParameter(gtrp[8],par(MIN)|par(CUR),SSRFCodonSubMod::defrGT); param[SSRFCodonSubMod::rGT+1]=new PositiveParameter(0.0,par(MIN)|par(CUR),1.0);//the branch length interpreter FreqParamGroup *stateFreqs=new FreqParamGroup(4,(param+SSRFCodonSubMod::freqA)); SSRFCodonSubMod::geneticCode=gcode; SetCodeRelatedGlobals(gcode); SSRFCodonSubMod::mutParamsDirty=true; /*double ***nucRateMat; nucRateMat=new double **[4]; nucRateMat[0]=new double *[4]; nucRateMat[1]=new double *[4]; nucRateMat[2]=new double *[4]; nucRateMat[3]=new double *[4];*/ nucRateMat[0][0]=nucRateMat[1][1]=nucRateMat[2][2]=nucRateMat[3][3]=NULL; nucRateMat[0][1]=nucRateMat[1][0]=&(param[SSRFCodonSubMod::rAC]->val); nucRateMat[0][2]=nucRateMat[2][0]=&(param[SSRFCodonSubMod::rAG]->val); nucRateMat[0][3]=nucRateMat[3][0]=&(param[SSRFCodonSubMod::rAT]->val); nucRateMat[1][2]=nucRateMat[2][1]=&(param[SSRFCodonSubMod::rCG]->val); nucRateMat[1][3]=nucRateMat[3][1]=&(param[SSRFCodonSubMod::rCT]->val); nucRateMat[2][3]=nucRateMat[3][2]=&(param[SSRFCodonSubMod::rGT]->val); /*baseFreqMat=new double[4];*/ baseFreqMat[0]=&(param[SSRFCodonSubMod::freqA]->val); baseFreqMat[1]=&(param[SSRFCodonSubMod::freqC]->val); baseFreqMat[2]=&(param[SSRFCodonSubMod::freqG]->val); baseFreqMat[3]=&(param[SSRFCodonSubMod::freqT]->val); double **mm; mm=psdmatrix(64); bool codOfObsAA[64],possAAs[21]; for (int i=0;i < nSitesInWholeProtein;i++) { double tot=0.0; for (int j=0;j < 20;j++) tot+=setofSSRFparams[i][j]; for (int j=0;j < 64;j++) { if (CodNum[j]<20) #ifdef TESTINGINFERENCE codOfObsAA[j]=possAAs[CodNum[j]]=true;//no packing at all if comparing two different models for the same site (in case on is zero for a state that the other has) #else if (setofSSRFparams[i][CodNum[j]]>0.0) {codOfObsAA[j]=possAAs[CodNum[j]]=true; } else {codOfObsAA[j]=possAAs[CodNum[j]]=false;} #endif } #ifdef NO_STOP_CODONS int rounds=0; while (fabs(tot-1.0)>bull::kSmallDbl) {if(fabs(tot-1.0)>.00001 || rounds>2) throw BadParams("Amino Acid Frequencies don't add up to 1.0"); else {for (int j=0;j < 20;j++) setofSSRFparams[i][j]/=tot; tot=0.0; for (int j=0;j < 20;j++) tot+=setofSSRFparams[i][j]; } rounds++; } possAAs[20]=false; codOfObsAA[CodByaa[20][0]]=codOfObsAA[CodByaa[20][1]]=codOfObsAA[CodByaa[20][2]]=codOfObsAA[CodByaa[20][3]]=false; #else if (fabs(1.0-tot)SetMultiplier(*multipliers++); } } void SSRFCodonSubMod::SetBlenMultBasedOnSBLI(void) { param[SSRFCodonSubMod::blenMult]->val=(3.0/sharedBrLenInterpreter->val); } void SSRFCodonSubMod::Summarize(std::ofstream &outpf) { ResizeModel(); CalculateQ(); outpf<val>0.0) {bool fourfolddeg=true; globFromcod=codonLocToGlob[row]; currFromFirBase=tfirbase[globFromcod]; currFromSecBase=tsecbase[globFromcod]; currFromThiBase=tthibase[globFromcod]; for (currToBase=0;currToBase < 4;currToBase++)//try mutating first base if (currToBase!=currFromFirBase) //only check if it is a mutant {globTocod=16*currToBase + 4*currFromSecBase + currFromThiBase; locTocod=codonGlobToLoc[globTocod]; if (locTocod!=-1) //if amino acid could have freq >0.0 {locToAA=codLocToAminoAcidLoc[locTocod]; temprOn+=qMatrix[row][locTocod]; if (locToAA == locFromAA) //same amino acid tempSynR+=qMatrix[row][locTocod]; else tempNonSynR+=qMatrix[row][locTocod]; } } for (currToBase=0;currToBase < 4;currToBase++)//try mutating second base if (currToBase!=currFromSecBase) //only check if it is a mutant {globTocod=16*currFromFirBase + 4*currToBase + currFromThiBase; locTocod=codonGlobToLoc[globTocod]; if (locTocod!=-1) //if amino acid could have freq >0.0 {locToAA=codLocToAminoAcidLoc[locTocod]; temprTw+=qMatrix[row][locTocod]; if (locToAA == locFromAA) //same amino acid tempSynR+=qMatrix[row][locTocod]; else tempNonSynR+=qMatrix[row][locTocod]; } } for (currToBase=0;currToBase < 4;currToBase++)//try mutating third base if (currToBase!=currFromThiBase) //only check if it is a mutant {globTocod=16*currFromFirBase + 4*currFromSecBase + currToBase; locTocod=codonGlobToLoc[globTocod]; if (locTocod!=-1) //if amino acid could have freq >0.0 {locToAA=codLocToAminoAcidLoc[locTocod]; temprTh+=qMatrix[row][locTocod]; if (locToAA == locFromAA) //same amino acid tempSynR+=qMatrix[row][locTocod]; else {fourfolddeg=false; tempNonSynR+=qMatrix[row][locTocod]; } } } rateOne+=temprOn*(*codFreqs[row]); rateTwo+=temprTw*(*codFreqs[row]); rateThree+=temprTh*(*codFreqs[row]); synR+=tempSynR*(*codFreqs[row]); nonSynR+=tempNonSynR*(*codFreqs[row]); if (fourfolddeg) pctTimeGTR+=(*codFreqs[row]); } } double varrateOne,varrateTwo,varrateThree,varsynR,varnonSynR; varrateOne=varrateTwo=varrateThree=varsynR=varnonSynR=0.0; #ifdef ALLOWMULTIHITS assert(0);//unwritten #endif for (int row=0;row < nstates;row++) {locFromAA=codLocToAminoAcidLoc[row]; double vartemprOn,vartemprTw,vartemprTh,vartempSynR,vartempNonSynR; vartemprOn=vartemprTw=vartemprTh=vartempSynR=vartempNonSynR=0.0; if (param[SSRFCodonSubMod::fAAs+locFromAA]->val>0.0) {bool fourfolddeg=true; globFromcod=codonLocToGlob[row]; currFromFirBase=tfirbase[globFromcod]; currFromSecBase=tsecbase[globFromcod]; currFromThiBase=tthibase[globFromcod]; for (currToBase=0;currToBase < 4;currToBase++)//try mutating first base if (currToBase!=currFromFirBase) //only check if it is a mutant {globTocod=16*currToBase + 4*currFromSecBase + currFromThiBase; locTocod=codonGlobToLoc[globTocod]; if (locTocod!=-1) //if amino acid could have freq >0.0 {locToAA=codLocToAminoAcidLoc[locTocod]; vartemprOn+=qMatrix[row][locTocod]; if (locToAA == locFromAA) //same amino acid vartempSynR+=qMatrix[row][locTocod]; else vartempNonSynR+=qMatrix[row][locTocod]; } } for (currToBase=0;currToBase < 4;currToBase++)//try mutating second base if (currToBase!=currFromSecBase) //only check if it is a mutant {globTocod=16*currFromFirBase + 4*currToBase + currFromThiBase; locTocod=codonGlobToLoc[globTocod]; if (locTocod!=-1) //if amino acid could have freq >0.0 {locToAA=codLocToAminoAcidLoc[locTocod]; vartemprTw+=qMatrix[row][locTocod]; if (locToAA == locFromAA) //same amino acid vartempSynR+=qMatrix[row][locTocod]; else vartempNonSynR+=qMatrix[row][locTocod]; } } for (currToBase=0;currToBase < 4;currToBase++)//try mutating third base if (currToBase!=currFromThiBase) //only check if it is a mutant {globTocod=16*currFromFirBase + 4*currFromSecBase + currToBase; locTocod=codonGlobToLoc[globTocod]; if (locTocod!=-1) //if amino acid could have freq >0.0 {locToAA=codLocToAminoAcidLoc[locTocod]; vartemprTh+=qMatrix[row][locTocod]; if (locToAA == locFromAA) //same amino acid vartempSynR+=qMatrix[row][locTocod]; else {fourfolddeg=false; vartempNonSynR+=qMatrix[row][locTocod]; } } } varrateOne+=(vartemprOn-rateOne)*(vartemprOn-rateOne)*(*codFreqs[row]); varrateTwo+=(vartemprTw-rateTwo)*(vartemprTw-rateTwo)*(*codFreqs[row]); varrateThree+=(vartemprTh-rateThree)*(vartemprTh-rateThree)*(*codFreqs[row]); varsynR+=(vartempSynR-synR)*(vartempSynR-synR)*(*codFreqs[row]); varnonSynR+=(vartempNonSynR-nonSynR)*(vartempNonSynR-nonSynR)*(*codFreqs[row]); } } outpf<highesEF) {highesEF=*codFreqs[i]; highIndex=i; } bool onOptPeak[64]; for (int i=0;i < nstates;i++) onOptPeak[i]=false; onOptPeak[highIndex]=true; std::vector < int> indecesOnOptPeak; indecesOnOptPeak.push_back(highIndex); for (int alreadyOnPeak=0;alreadyOnPeak < indecesOnOptPeak.size();alreadyOnPeak++) {//go through all of the mutational neighbors and add them if their eqFreq is lower for (int i=0;i < 9;i++) {int neighborstate=GetIndexOfMutationalNeighbor(indecesOnOptPeak[alreadyOnPeak],i); if (neighborstate >= 0) {//state has nonzerofreq if (*codFreqs[neighborstate]<*codFreqs[indecesOnOptPeak[alreadyOnPeak]]) {//neighbor is lower on hill indecesOnOptPeak.push_back(neighborstate); onOptPeak[neighborstate]=true; } } } } double subOptPro=0.0; for (int i=0;i < nstates;i++) if (!onOptPeak[i]) subOptPro+=*codFreqs[i]; return subOptPro; } int SSRFCodonSubMod::GetIndexOfMutationalNeighbor(int inIndex,int numOfMutNeighbor) { int startingCod,endingCod; startingCod=codonLocToGlob[inIndex]; endingCod=codonGlobToLoc[GetGlobalIndexOfMutationalNeighbor(startingCod,numOfMutNeighbor)]; return endingCod; } void bull::ModifyBlenMultSoBranchesAreScaled(SSRFCodonSubMod **modArr,int ns) { //i've been sloppy about calling CalculateQ, so make sure all of the models are updated for (int i=0;i < ns;i++) modArr[i]->CalculateQ(); //assumes that all of the models share the same blenMult and sharedBrLenInterpreter (*modArr)->SetBlenMultBasedOnSBLI(); /* ofstream modDes("ModelDescription"); for (int i=0;i < ns;i++) modArr[i]->Summarize(modDes); modDes.close(); */ } int bull::GetNumberOfPossibleAminoAcids(bool *codOfObsAA,bool *possAAs) { int naas=0; for (int i=0;i < 21;i++) if (possAAs[i]) naas++; int prevnaas=-1; while (prevnaas!=naas) {prevnaas=naas; int codn=0; for (int fb=0;fb < 4;fb++) for (int sb=0;sb < 4;sb++) for (int tb=0;tb < 4;tb++) { #ifdef NO_STOP_CODONS if (!codOfObsAA[codn] && CodNum[codn]!=20) {bool mfir=false; for (int mutatedbase=0;(!mfir && mutatedbase < 4);mutatedbase++) if (codOfObsAA[16*mutatedbase+4*sb+tb] ) mfir=true; bool msec=false; for (int mutatedbase=0;(!msec && mutatedbase < 4);mutatedbase++) if (codOfObsAA[16*fb+4*mutatedbase+tb] ) msec=true; bool mthi=false; for (int mutatedbase=0;(!mthi && mutatedbase < 4);mutatedbase++) if (codOfObsAA[16*fb+4*sb+mutatedbase] ) mthi=true; if ((mfir&&msec) ||(mthi&&msec) ||(mfir&&mthi)) {naas++; possAAs[CodNum[codn]]=true; for (int k=0;k < 64;k++) if (CodNum[codn] == CodNum[k]) codOfObsAA[k]=true; } } #else if (!codOfObsAA[codn]) {bool mfir=false; for (int mutatedbase=0;(!mfir && mutatedbase < 4);mutatedbase++) if (codOfObsAA[16*mutatedbase+4*sb+tb]) mfir=true; bool msec=false; for (int mutatedbase=0;(!msec && mutatedbase < 4);mutatedbase++) if (codOfObsAA[16*fb+4*mutatedbase+tb]) msec=true; bool mthi=false; for (int mutatedbase=0;(!mthi && mutatedbase < 4);mutatedbase++) if (codOfObsAA[16*fb+4*sb+mutatedbase]) mthi=true; if ((mfir&&msec) ||(mthi&&msec) ||(mfir&&mthi)) {naas++; possAAs[CodNum[codn]]=true; for (int k=0;k < 64;k++) if (CodNum[codn] == CodNum[k]) codOfObsAA[k]=true; } } #endif codn++; } } return naas; } int bull::GetNumberOfPossibleCodons(bool *possAAs) { int nc=0; for (int i=0;i < 21;i++) if (possAAs[i]) nc+=NCodByAA[i]; return nc; } void SSRFCodonSubMod::AddAminoAcidFreqs(std::ofstream &dest) { //for printing out amino acid freqs in log file double outpaaf[20]; for (int i=0;i < 20;i++) outpaaf[i]=0.0; for (int i=0;i < nPossAAs;i++) if (aminoAcidLocToGlob[i]<20) outpaaf[aminoAcidLocToGlob[i]]=param[SSRFCodonSubMod::fAAs+i]->val; for (int i=0;i < 20;i++) {dest<= 0 && numOfMutNeighbor < 9); //takes and returns global codon num int mutbase=0,basesMoved=-1; int retval; if (numOfMutNeighbor < 3) //mutating third {while (basesMoved < numOfMutNeighbor) {if(mutbase!=tthibase[inIndex]) basesMoved++; mutbase++; } mutbase--; retval=(16*tfirbase[inIndex]+4*tsecbase[inIndex]+mutbase); } else if (numOfMutNeighbor < 6) //mutating second {numOfMutNeighbor-=3; while (basesMoved < numOfMutNeighbor) {if(mutbase!=tsecbase[inIndex]) basesMoved++; mutbase++; } mutbase--; retval=(16*tfirbase[inIndex]+4*mutbase+tthibase[inIndex]); } else //mutating first {numOfMutNeighbor-=6; while (basesMoved < numOfMutNeighbor) {if(mutbase!=tfirbase[inIndex]) basesMoved++; mutbase++; } mutbase--; retval=16*mutbase+4*tsecbase[inIndex]+tthibase[inIndex]; } return retval; }