16.6 Test the New Updaters

(Linux version)

< 16.5 | 16.6 | 17.0 >

Add the new updaters to Chain::createUpdaters

In order to use the new Updater-derived classes, they must be added to the Chain class _updaters vector. This is done in Chain::createUpdaters.

    inline unsigned Chain::createUpdaters(Model::SharedPtr model, Lot::SharedPtr lot, Likelihood::SharedPtr likelihood) { 
        _model = model;
        _lot = lot;
        _updaters.clear();

        double wstd             = 1.0;
        double sum_weights      = 0.0;
        
        // Add state frequency parameter updaters to _updaters
        Model::state_freq_params_t & statefreq_shptr_vect = _model->getStateFreqParams();
        for (auto statefreq_shptr : statefreq_shptr_vect) {
            Updater::SharedPtr u = StateFreqUpdater::SharedPtr(new StateFreqUpdater(statefreq_shptr));
            u->setLikelihood(likelihood);
            u->setLot(lot);
            u->setLambda(1.0);
            u->setTargetAcceptanceRate(0.3);
            u->setPriorParameters(std::vector<double>(statefreq_shptr->getStateFreqsSharedPtr()->size(), 1.0));
            u->setWeight(wstd); sum_weights += wstd;
            _updaters.push_back(u);
        }

        // Add exchangeability parameter updaters to _updaters
        Model::exchangeability_params_t & exchangeability_shptr_vect = _model->getExchangeabilityParams();
        for (auto exchangeability_shptr : exchangeability_shptr_vect) {
            Updater::SharedPtr u = ExchangeabilityUpdater::SharedPtr(new ExchangeabilityUpdater(exchangeability_shptr));
            u->setLikelihood(likelihood);
            u->setLot(lot);
            u->setLambda(1.0);
            u->setTargetAcceptanceRate(0.3);
            u->setPriorParameters({1.0, 1.0, 1.0, 1.0, 1.0, 1.0});
            u->setWeight(wstd); sum_weights += wstd;
            _updaters.push_back(u);
        }

        // Add rate variance parameter updaters to _updaters
        Model::ratevar_params_t & ratevar_shptr_vect = _model->getRateVarParams();
        for (auto ratevar_shptr : ratevar_shptr_vect) {
            Updater::SharedPtr u = GammaRateVarUpdater::SharedPtr(new GammaRateVarUpdater(ratevar_shptr));
            u->setLikelihood(likelihood);
            u->setLot(lot);
            u->setLambda(1.0);
            u->setTargetAcceptanceRate(0.3);
            u->setPriorParameters({1.0, 1.0});
            u->setWeight(wstd); sum_weights += wstd;
            _updaters.push_back(u);
        }
        
        // Add pinvar parameter updaters to _updaters
        Model::pinvar_params_t & pinvar_shptr_vect = _model->getPinvarParams();
        for (auto pinvar_shptr : pinvar_shptr_vect) {
            Updater::SharedPtr u = PinvarUpdater::SharedPtr(new PinvarUpdater(pinvar_shptr));
            u->setLikelihood(likelihood);
            u->setLot(lot);
            u->setLambda(0.5);
            u->setTargetAcceptanceRate(0.3);
            u->setPriorParameters({1.0, 1.0});
            u->setWeight(wstd); sum_weights += wstd;
            _updaters.push_back(u);
        }
        
        // Add omega parameter updaters to _updaters
        Model::omega_params_t & omega_shptr_vect = _model->getOmegaParams();
        for (auto omega_shptr : omega_shptr_vect) {
            Updater::SharedPtr u = OmegaUpdater::SharedPtr(new OmegaUpdater(omega_shptr));
            u->setLikelihood(likelihood);
            u->setLot(lot);
            u->setLambda(1.0);
            u->setTargetAcceptanceRate(0.3);
            u->setPriorParameters({1.0, 1.0});
            u->setWeight(wstd); sum_weights += wstd;
            _updaters.push_back(u);
        }
        
        // Add subset relative rate parameter updater to _updaters
        if (!_model->isFixedSubsetRelRates()) {
            Updater::SharedPtr u = SubsetRelRateUpdater::SharedPtr(new SubsetRelRateUpdater(_model));
            u->setLikelihood(likelihood);
            u->setLot(lot);
            u->setLambda(1.0);
            u->setTargetAcceptanceRate(0.3);
            u->setPriorParameters(std::vector<double>(_model->getNumSubsets(), 1.0));
            u->setWeight(wstd); sum_weights += wstd;
            _updaters.push_back(u);
        }
                
        for (auto u : _updaters) {
            u->calcProb(sum_weights);
        }
        
        return (unsigned)_updaters.size();
    } 

Be sure to add the appropriate headers to the top of the chain.hpp file:

#pragma once    

#include <memory>
#include <boost/format.hpp>
#include "lot.hpp"
#include "data.hpp"
#include "tree.hpp"
#include "model.hpp"
#include "likelihood.hpp"
#include "tree_manip.hpp"
#include "updater.hpp"
#include "gamma_ratevar_updater.hpp"
#include "omega_updater.hpp"
#include "pinvar_updater.hpp"
#include "statefreq_updater.hpp"
#include "exchangeability_updater.hpp"
#include "subset_relrate_updater.hpp"

Run strom

Nucleotide model analysis

Copy the following into your strom.conf file. This tests all updaters except for OmegaUpdater (we will test that one separately below).

datafile    = rbcl10.nex    
treefile    = rbcl10.tre

subset      = first:1-1314\3
subset      = second:2-1314\3
subset      = third:3-1314\3

statefreq   = default:0.294667,0.189172,0.206055,0.310106
rmatrix     = default:0.06082,0.27887,0.06461,0.06244,0.48492,0.04834
ratevar     = default:1.0
ratevar     = third:1.0
pinvar      = default:0.1
pinvar      = third:0.1
ncateg      = default:4
relrate     = default:1,2,3
niter       = 10000
samplefreq  = 10
printfreq   = 100
expectedLnL = -6676.396     

Run strom and verify that all model parameters are being updated (except tree topology and edge lengths) by viewing the params.txt file in the program Tracer. You should find that rAC-0, rAC-1, and rAC-2 all have identical values at every iteration. This is because we specified only one set of starting values for rmatrix and assigned these to default, which means that one set of exchangeabilities applies to all subsets. In other words, we linked exchangeabilities across subsets.

On the other hand, we allowed the third codon position subset to have its own ratevar and pinvar parameters. so you should find that pinvar-0 and pinvar-1 are always identical but pinvar-2 takes a different trajectory (and ratevar-0 and ratevar-1 are identical while ratevar-2 is different).

Our program treats subset relative rates as a single multivariate parameter, so you can either fix these or allow them to vary, but if you allow them to vary, they all will vary. Note that the rate of 3rd codon position sites is more than 7 times higher than the rate of 1st position sites and more than 8 times higher than 2nd position sites, which makes sense given that rbcL is a strongly conserved protein coding gene that allows few nonsynonynous substitutions.

Codon model analysis

To estimate the nonsynonymous/synonymous rate ratio directly, specifying a single codon subset rather than 3 nucleotide subsets in your strom.conf file as shown below:

datafile    = rbcl10.nex    
treefile    = rbcl10.tre

subset      = default[codon,standard]:1-438

statefreq   = default:equal
omega       = default:0.05
ncateg      = default:1
pinvar      = default:[0.0]
niter       = 10000 
samplefreq  = 10
printfreq   = 1000
expectedLnL = -6301.68752     

The codon model will take much longer than the nucleotide analysis - this is typical unless your BeagleLib is able to take advantage of a GPU resource, which speeds up codon models proportionately more than nucleotide models due to the difference in the number of states (61 for the codon model vs. 4 for the nucleotide models).

Because the codon model is so slow, you may wish to create a release version of the program that is optimized for speed and use that executable to run the codon example. To do this, add buildtype=release to the default options in your meson.build file, as shown below:

project('strom', 'cpp',
	default_options : ['cpp_std=c++11','buildtype=release','prefix=/home/paul/Documents/strom/distr'],
	version : '1.0')
cpp = meson.get_compiler('cpp')
lib_filesystem = cpp.find_library('boost_filesystem', dirs: ['/home/paul/lib/static'], required: true)
lib_program_options = cpp.find_library('boost_program_options', dirs: ['/home/paul/lib/static'], required: true)
lib_ncl = cpp.find_library('ncl', dirs: ['/home/paul/lib/static'], required: true)
lib_beagle = cpp.find_library('hmsbeagle', dirs: ['/home/paul/lib'], required: true)
incl_beagle = include_directories('/home/paul/include/libhmsbeagle-1')
incl_ncl = include_directories('/home/paul/include')
incl_boost = include_directories('/home/paul/Documents/libraries/boost_1_71_0')
incl_eigen = include_directories('/home/paul/Documents/libraries/eigen-eigen-323c052e1731')
executable('strom', 'main.cpp', install: true, install_dir: '.', dependencies: [lib_beagle,lib_ncl,lib_program_options,lib_filesystem], include_directories: [incl_beagle,incl_ncl,incl_boost,incl_eigen])
install_data('strom.conf', install_dir: '.')
install_data('rbcl10.nex', install_dir: '.')
install_data('rbcl10.tre', install_dir: '.')
install_data('rbcl738.nex', install_dir: '.')
install_data('rbcl738nj.tre', install_dir: '.')
install_data('test.tre', install_dir: '.')
install_data('rbcL.nex', install_dir: '.')
install_data('rbcLjc.tre', install_dir: '.')
install_data('go.sh', install_dir: '.')
install_data('datatest.nex', install_dir: '.')

The options possible for the default_options list can be found at the built-in options section of the Meson web site.

You should find that omega has a posterior mean of about 0.02 to 0.03. This small nonsynonymous/synonymous rate ratio implies that this gene is under strong stabilizing selection and mutations that change the amino acid in the corresponding protein are rarely preserved compared to mutations that have no effect on the protein primary structure.

< 16.5 | 16.6 | 17.0 >