18.1 Modify the Strom Class

(Win version)

< 18.0 | 18.1 | 18.2 >

All of the modifications needed to switch to a multichain version of our program are confined to the Strom class. The changes needed are relatively minor, but numerous, because each chain needs to have its own Likelihood and Model objects (otherwise all chains would update the exact same model parameter values).

The Strom class declaration

Begin by making the indicated changes and additions in the Strom class declaration (in the file strom.hpp). Note that some highlighted functions are already in your file, but have been changed, so look carefully to find what’s different!

#pragma once    

#include &lt;iostream&gt;
#include "data.hpp" 
#include "likelihood.hpp"
#include "tree_summary.hpp"
#include "partition.hpp"
#include "lot.hpp"
#include "chain.hpp"
#include "output_manager.hpp"
#include &lt;boost/program_options.hpp&gt;
#include &lt;boost/filesystem.hpp&gt;
#include &lt;boost/algorithm/string/split.hpp&gt;
#include &lt;boost/algorithm/string/classification.hpp&gt;

namespace strom {

    class Strom {
        public:
                                                    Strom();
                                                    ~Strom();

            void                                    clear();
            void                                    processCommandLineOptions(int argc, const char * argv[]);
            void                                    run();
        
        private:
            <span style="color:#0000ff"><strong>bool                                    processAssignmentString(Model::SharedPtr m, const std::string & which, const std::string & definition);</strong></span>
            <span style="color:#0000ff"><strong>void                                    handleAssignmentStrings(Model::SharedPtr m, const boost::program_options::variables_map & vm, std::string label, const std::vector&lt;std::string&gt; & definitions, std::string default_definition);</strong></span>
            bool                                    splitAssignmentString(const std::string & definition, std::vector&lt;std::string&gt; & vector_of_subset_names, std::vector&lt;double&gt;  & vector_of_values);
            void                                    sample(unsigned iter, Chain & chain);

            <span style="color:#0000ff"><strong>void                                    readData();</strong></span>
            <span style="color:#0000ff"><strong>void                                    readTrees();</strong></span>
            <span style="color:#0000ff"><strong>void                                    showPartitionInfo();</strong></span>
            <span style="color:#0000ff"><strong>void                                    showBeagleInfo();</strong></span>
            <span style="color:#0000ff"><strong>void                                    showMCMCInfo();</strong></span>
            <span style="color:#0000ff"><strong>void                                    calcHeatingPowers();</strong></span>
            <span style="color:#0000ff"><strong>void                                    initChains();</strong></span>
            <span style="color:#0000ff"><strong>void                                    startTuningChains();</strong></span>
            <span style="color:#0000ff"><strong>void                                    stopTuningChains();</strong></span>
            <span style="color:#0000ff"><strong>void                                    stepChains(unsigned iteration, bool sampling);</strong></span>
            <span style="color:#0000ff"><strong>void                                    swapChains();</strong></span>
            <span style="color:#0000ff"><strong>void                                    stopChains();</strong></span>
            <span style="color:#0000ff"><strong>void                                    swapSummary() const;</strong></span>
            <span style="color:#0000ff"><strong>void                                    showChainTuningInfo() const;</strong></span>

            double                                  _expected_log_likelihood;

            std::string                             _data_file_name;
            std::string                             _tree_file_name;
            Partition::SharedPtr                    _partition;

            Data::SharedPtr                         _data;
            <span style="color:#0000ff"><strong>std::vector&lt;Likelihood::SharedPtr&gt;      _likelihoods;</strong></span>
            <span style="color:#0000ff"><strong>//Model::SharedPtr                        _model;</strong></span>
            <span style="color:#0000ff"><strong>//Likelihood::SharedPtr                   _likelihood;</strong></span>
            TreeSummary::SharedPtr                  _tree_summary;
            Lot::SharedPtr                          _lot;

            unsigned                                _random_seed;
            unsigned                                _num_iter;
            unsigned                                _print_freq;
            unsigned                                _sample_freq;

            <span style="color:#0000ff"><strong>unsigned                                _num_burnin_iter;</strong></span>
            <span style="color:#0000ff"><strong>unsigned                                _num_chains;</strong></span>
            <span style="color:#0000ff"><strong>double                                  _heating_lambda;</strong></span>
            <span style="color:#0000ff"><strong>bool                                    _using_stored_data;</strong></span>
            <span style="color:#0000ff"><strong>std::vector&lt;Chain&gt;                      _chains;</strong></span>
            <span style="color:#0000ff"><strong>std::vector&lt;double&gt;                     _heating_powers;</strong></span>
            <span style="color:#0000ff"><strong>std::vector&lt;unsigned&gt;                   _swaps;</strong></span>

            bool                                    _use_gpu;
            bool                                    _ambig_missing;
            bool                                    _use_underflow_scaling;

            static std::string                      _program_name;
            static unsigned                         _major_version;
            static unsigned                         _minor_version;
            
            OutputManager::SharedPtr                _output_manager;

    };  

The processAssignmentString and handleAssignmentStrings member functions now have an additional Model::SharedPtr parameter. The _likelihood data member has been replaced by a vector of Likelihood shared pointers named _likelihoods, and _model has been deleted because the model specific to a particular Likelihood object can always be obtained from that Likelihood object using the getModel member function. The other changes are all additions of member functions or data members.

Modify the clear member function

Initialize the new additions in the clear member function.

    inline void Strom::clear() {    
        _data_file_name             = "";
        _tree_file_name             = "";
        _tree_summary               = nullptr;
        _partition.reset(new Partition());
        _use_gpu                    = true;
        _ambig_missing              = true;
        <span style="color:#0000ff"><strong>//_model.reset(new Model());</strong></span>
        _expected_log_likelihood    = 0.0;
        _data                       = nullptr;
        <span style="color:#0000ff"><strong>//_likelihood                 = nullptr;</strong></span>
        _use_underflow_scaling      = false;
        _lot                        = nullptr;
        _random_seed                = 1;
        _num_iter                   = 1000;
        _print_freq                 = 1;
        _sample_freq                = 1;
        _output_manager             = nullptr;

        <span style="color:#0000ff"><strong>_using_stored_data          = true;</strong></span>
        <span style="color:#0000ff"><strong>_likelihoods.clear();</strong></span>
        <span style="color:#0000ff"><strong>_num_burnin_iter            = 1000;</strong></span>
        <span style="color:#0000ff"><strong>_heating_lambda             = 0.5;</strong></span>
        <span style="color:#0000ff"><strong>_num_chains                 = 1;</strong></span>
        <span style="color:#0000ff"><strong>_chains.resize(0);</strong></span>
        <span style="color:#0000ff"><strong>_heating_powers.resize(0);</strong></span>
        <span style="color:#0000ff"><strong>_swaps.resize(0);</strong></span>
    }   

Add new program options

Add new program options for the number of chains to run simultaneously (nchains), the factor that determines how hot each heated chain is (heatfactor), the number of iterations to use for burn-in (burnin), and whether to explore the posterior (usedata=yes) or the prior (usedata=no).

    inline void Strom::processCommandLineOptions(int argc, const char * argv[]) {   
        std::vector&lt;std::string&gt; partition_statefreq;
        std::vector&lt;std::string&gt; partition_rmatrix;
        std::vector&lt;std::string&gt; partition_omega;
        std::vector&lt;std::string&gt; partition_ratevar;
        std::vector&lt;std::string&gt; partition_pinvar;
        std::vector&lt;std::string&gt; partition_ncateg;
        std::vector&lt;std::string&gt; partition_subsets;
        std::vector&lt;std::string&gt; partition_relrates;
        std::vector&lt;std::string&gt; partition_tree;
        boost::program_options::variables_map vm;
        boost::program_options::options_description desc("Allowed options");
        desc.add_options()
            ("help,h", "produce help message")
            ("version,v", "show program version")
            ("seed,z",        boost::program_options::value(&_random_seed)-&gt;default_value(1),   "pseudorandom number seed")
            ("niter,n",       boost::program_options::value(&_num_iter)-&gt;default_value(1000),   "number of MCMC iterations")
            ("printfreq",  boost::program_options::value(&_print_freq)-&gt;default_value(1),   "skip this many iterations before reporting progress")
            ("samplefreq",  boost::program_options::value(&_sample_freq)-&gt;default_value(1),   "skip this many iterations before sampling next")
            ("datafile,d",  boost::program_options::value(&_data_file_name)-&gt;required(), "name of a data file in NEXUS format")
            ("treefile,t",  boost::program_options::value(&_tree_file_name)-&gt;required(), "name of a tree file in NEXUS format")
            ("subset",  boost::program_options::value(&partition_subsets), "a string defining a partition subset, e.g. 'first:1-1234\3' or 'default[codon:standard]:1-3702'")
            ("ncateg,c", boost::program_options::value(&partition_ncateg), "number of categories in the discrete Gamma rate heterogeneity model")
            ("statefreq", boost::program_options::value(&partition_statefreq), "a string defining state frequencies for one or more data subsets, e.g. 'first,second:0.1,0.2,0.3,0.4'")
            ("omega", boost::program_options::value(&partition_omega), "a string defining the nonsynonymous/synonymous rate ratio omega for one or more data subsets, e.g. 'first,second:0.1'")
            ("rmatrix", boost::program_options::value(&partition_rmatrix), "a string defining the rmatrix for one or more data subsets, e.g. 'first,second:1,2,1,1,2,1'")
            ("ratevar", boost::program_options::value(&partition_ratevar), "a string defining the among-site rate variance for one or more data subsets, e.g. 'first,second:2.5'")
            ("pinvar", boost::program_options::value(&partition_pinvar), "a string defining the proportion of invariable sites for one or more data subsets, e.g. 'first,second:0.2'")
            ("relrate", boost::program_options::value(&partition_relrates), "a string defining the (unnormalized) relative rates for all data subsets (e.g. 'default:3,1,6').")
            ("tree", boost::program_options::value(&partition_tree), "the index of the tree in the tree file (first tree has index = 1)")
            ("expectedLnL", boost::program_options::value(&_expected_log_likelihood)-&gt;default_value(0.0), "log likelihood expected")
            <span style="color:#0000ff"><strong>("nchains",       boost::program_options::value(&_num_chains)-&gt;default_value(1),                "number of chains")</strong></span>
            <span style="color:#0000ff"><strong>("heatfactor",    boost::program_options::value(&_heating_lambda)-&gt;default_value(0.5),          "determines how hot the heated chains are")</strong></span>
            <span style="color:#0000ff"><strong>("burnin",        boost::program_options::value(&_num_burnin_iter)-&gt;default_value(100),         "number of iterations used to burn in chains")</strong></span>
            <span style="color:#0000ff"><strong>("usedata",       boost::program_options::value(&_using_stored_data)-&gt;default_value(true),      "use the stored data in calculating likelihoods (specify no to explore the prior)")</strong></span>
            ("gpu",           boost::program_options::value(&_use_gpu)-&gt;default_value(true),                "use GPU if available") 
            ("ambigmissing",  boost::program_options::value(&_ambig_missing)-&gt;default_value(true),          "treat all ambiguities as missing data")
            ("underflowscaling",  boost::program_options::value(&_use_underflow_scaling)-&gt;default_value(false),          "scale site-likelihoods to prevent underflow (slower but safer)")
        ;
        boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
        try {
            const boost::program_options::parsed_options & parsed = boost::program_options::parse_config_file&lt; char &gt;("strom.conf", desc, false);
            boost::program_options::store(parsed, vm);
        }
        catch(boost::program_options::reading_file & x) {
            std::cout &lt;&lt; "Note: configuration file (strom.conf) not found" &lt;&lt; std::endl;
        }
        boost::program_options::notify(vm);

        // If user specified --help on command line, output usage summary and quit
        if (vm.count("help") &gt; 0) {
            std::cout &lt;&lt; desc &lt;&lt; "\n";
            std::exit(1);
        }

        // If user specified --version on command line, output version and quit
        if (vm.count("version") &gt; 0) {
            std::cout &lt;&lt; boost::str(boost::format("This is %s version %d.%d") % _program_name % _major_version % _minor_version) &lt;&lt; std::endl;
            std::exit(1);
        }
    
        // If user specified --subset on command line, break specified partition subset 
        // definition into name and character set string and add to _partition
        if (vm.count("subset") &gt; 0) {
            _partition.reset(new Partition());
            for (auto s : partition_subsets) {
                _partition-&gt;parseSubsetDefinition(s);
            }
        }

        <span style="color:#0000ff"><strong>// Be sure number of chains is greater than or equal to 1</strong></span>
        <span style="color:#0000ff"><strong>if (_num_chains &lt; 1)</strong></span>
            <span style="color:#0000ff"><strong>throw XStrom("nchains must be a positive integer greater than 0");</strong></span>

        <span style="color:#0000ff"><strong>// Be sure heatfactor is between 0 and 1</strong></span>
        <span style="color:#0000ff"><strong>if (_heating_lambda &lt;= 0.0 || _heating_lambda &gt; 1.0)</strong></span>
            <span style="color:#0000ff"><strong>throw XStrom("heatfactor must be a real number in the interval (0.0,1.0]");</strong></span>
        
        <span style="color:#0000ff"><strong>if (!_using_stored_data)</strong></span>
            <span style="color:#0000ff"><strong>std::cout &lt;&lt; "\n*** Not using stored data (posterior = prior) ***\n" &lt;&lt; std::endl;</strong></span>

        <span style="color:#0000ff"><strong>// Allocate a separate model for each chain</strong></span>
        <span style="color:#0000ff"><strong>for (unsigned c = 0; c &lt; _num_chains; c++) {</strong></span>
            <span style="color:#0000ff"><strong>Likelihood::SharedPtr likelihood = Likelihood::SharedPtr(new Likelihood());</strong></span>
            <span style="color:#0000ff"><strong>likelihood-&gt;setPreferGPU(_use_gpu);</strong></span>
            <span style="color:#0000ff"><strong>likelihood-&gt;setAmbiguityEqualsMissing(_ambig_missing);</strong></span>
            <span style="color:#0000ff"><strong>Model::SharedPtr m = likelihood-&gt;getModel();</strong></span>
            <span style="color:#0000ff"><strong>m-&gt;setSubsetDataTypes(_partition-&gt;getSubsetDataTypes());</strong></span>
            <span style="color:#0000ff"><strong>handleAssignmentStrings(m, vm, "statefreq", partition_statefreq, "default:equal");</strong></span>
            <span style="color:#0000ff"><strong>handleAssignmentStrings(m, vm, "rmatrix",   partition_rmatrix,   "default:equal");</strong></span>
            <span style="color:#0000ff"><strong>handleAssignmentStrings(m, vm, "omega",     partition_omega,     "default:0.1"  );</strong></span>
            <span style="color:#0000ff"><strong>handleAssignmentStrings(m, vm, "ncateg",    partition_ncateg,    "default:1"    );</strong></span>
            <span style="color:#0000ff"><strong>handleAssignmentStrings(m, vm, "ratevar",   partition_ratevar,   "default:1.0"  );</strong></span>
            <span style="color:#0000ff"><strong>handleAssignmentStrings(m, vm, "pinvar",    partition_pinvar,    "default:0.0"  );</strong></span>
            <span style="color:#0000ff"><strong>handleAssignmentStrings(m, vm, "relrate",   partition_relrates,  "default:equal");</strong></span>
            <span style="color:#0000ff"><strong>handleAssignmentStrings(m, vm, "tree",      partition_tree,      "default:1");</strong></span>
            <span style="color:#0000ff"><strong>_likelihoods.push_back(likelihood);</strong></span>
        <span style="color:#0000ff"><strong>}</strong></span>
    }   

At the end of the processCommandLineOptions function, note that the section below

        _model-&gt;setSubsetDataTypes(_partition-&gt;getSubsetDataTypes());   
        
        handleAssignmentStrings(vm, "statefreq", partition_statefreq, "default:equal");
        handleAssignmentStrings(vm, "rmatrix",   partition_rmatrix,   "default:equal");
        handleAssignmentStrings(vm, "omega",     partition_omega,     "default:0.1"  );
        handleAssignmentStrings(vm, "ncateg",    partition_ncateg,    "default:1"    );
        handleAssignmentStrings(vm, "ratevar",   partition_ratevar,   "default:1.0"  );
        handleAssignmentStrings(vm, "pinvar",    partition_pinvar,    "default:0.0"  );
        handleAssignmentStrings(vm, "relrate",   partition_relrates,  "default:equal");
        handleAssignmentStrings(vm, "tree",      partition_tree,      "default:1"    ); 

has been replaced by

        // Allocate a separate model for each chain 
        for (unsigned c = 0; c &lt; _num_chains; c++) {
            Likelihood::SharedPtr likelihood = Likelihood::SharedPtr(new Likelihood());
            likelihood-&gt;setPreferGPU(_use_gpu);
            likelihood-&gt;setAmbiguityEqualsMissing(_ambig_missing);
            Model::SharedPtr m = likelihood-&gt;getModel();
            m-&gt;setSubsetDataTypes(_partition-&gt;getSubsetDataTypes());
            handleAssignmentStrings(m, vm, "statefreq", partition_statefreq, "default:equal");
            handleAssignmentStrings(m, vm, "rmatrix",   partition_rmatrix,   "default:equal");
            handleAssignmentStrings(m, vm, "omega",     partition_omega,     "default:0.1"  );
            handleAssignmentStrings(m, vm, "ncateg",    partition_ncateg,    "default:1"    );
            handleAssignmentStrings(m, vm, "ratevar",   partition_ratevar,   "default:1.0"  );
            handleAssignmentStrings(m, vm, "pinvar",    partition_pinvar,    "default:0.0"  );
            handleAssignmentStrings(m, vm, "relrate",   partition_relrates,  "default:equal");
            handleAssignmentStrings(m, vm, "tree",      partition_tree,      "default:1");
            _likelihoods.push_back(likelihood);
        }   

We now must create a separate Likelihood object for every chain and set up the model contained by each of those Likelihood objects.

Modify the handleAssignmentStrings member function

In addition to adding the Model::SharedPtr m parameter to the function, change the two lines indicated so that the model m is passed as the first parameter to the processAssignmentString function.

    <span style="color:#0000ff"><strong>inline void Strom::handleAssignmentStrings(Model::SharedPtr m, const boost::program_options::variables_map & vm, std::string label, const std::vector&lt;std::string&gt; & definitions, std::string default_definition) {</strong></span>
        if (vm.count(label) &gt; 0) {
            bool first = true;
            for (auto s : definitions) {
                <span style="color:#0000ff"><strong>bool is_default = processAssignmentString(m, label, s);</strong></span>
                if (is_default && !first)
                    throw XStrom(boost::format("default specification must be first %s encountered") % label);
                first = false;
            }
        }
        else {
            <span style="color:#0000ff"><strong>processAssignmentString(m, label, default_definition);</strong></span>
        }
    }   

Modify the processAssignmentString member function

In addition to adding the Model::SharedPtr m parameter to the function, change all instances of _model to m, as indicated, because there is no longer a single model pointed to by _model.

    <span style="color:#0000ff"><strong>inline bool Strom::processAssignmentString(Model::SharedPtr m, const std::string & which, const std::string & definition) {</strong></span>
        unsigned num_subsets_defined = _partition-&gt;getNumSubsets();
        std::vector&lt;std::string&gt; vector_of_subset_names;
        std::vector&lt;double&gt; vector_of_values;
        bool fixed = splitAssignmentString(definition, vector_of_subset_names, vector_of_values);
        
        if (vector_of_values.size() == 1 && vector_of_values[0] == -1 && !(which == "statefreq" || which == "rmatrix" || which == "relrate"))
            throw XStrom("Keyword equal is only allowed for statefreq, rmatrix, and relrate");

        // Assign values to subsets in model
        bool default_found = false;
        if (which == "statefreq") {
            QMatrix::freq_xchg_ptr_t freqs = std::make_shared&lt;QMatrix::freq_xchg_t&gt;(vector_of_values);
            if (vector_of_subset_names[0] == "default") {
                default_found = true;
                for (unsigned i = 0; i &lt; num_subsets_defined; i++)
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetStateFreqs(freqs, i, fixed);</strong></span>
            }
            else {
                for (auto s : vector_of_subset_names) {
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetStateFreqs(freqs, _partition-&gt;findSubsetByName(s), fixed);</strong></span>
                }
            }
        }
        else if (which == "rmatrix") {
            QMatrix::freq_xchg_ptr_t xchg = std::make_shared&lt;QMatrix::freq_xchg_t&gt;(vector_of_values);
            if (vector_of_subset_names[0] == "default") {
                default_found = true;
                for (unsigned i = 0; i &lt; num_subsets_defined; i++)
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetExchangeabilities(xchg, i, fixed);</strong></span>
            }
            else {
                for (auto s : vector_of_subset_names) {
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetExchangeabilities(xchg, _partition-&gt;findSubsetByName(s), fixed);</strong></span>
                }
            }
        }
        else if (which == "omega") {
            if (vector_of_values.size() &gt; 1)
                throw XStrom(boost::format("expecting 1 value for omega, found %d values") % vector_of_values.size());
            QMatrix::omega_ptr_t omega = std::make_shared&lt;QMatrix::omega_t&gt;(vector_of_values[0]);
            if (vector_of_subset_names[0] == "default") {
                default_found = true;
                for (unsigned i = 0; i &lt; num_subsets_defined; i++)
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetOmega(omega, i, fixed);</strong></span>
            }
            else {
                for (auto s : vector_of_subset_names) {
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetOmega(omega, _partition-&gt;findSubsetByName(s), fixed);</strong></span>
                }
            }
        }
        else if (which == "pinvar") {
            if (vector_of_values.size() &gt; 1)
                throw XStrom(boost::format("expecting 1 value for pinvar, found %d values") % vector_of_values.size());
            ASRV::pinvar_ptr_t p = std::make_shared&lt;double&gt;(vector_of_values[0]);
            bool invar_model = (*p &gt; 0);
            if (vector_of_subset_names[0] == "default") {
                default_found = true;
                for (unsigned i = 0; i &lt; num_subsets_defined; i++) {
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetIsInvarModel(invar_model, i);</strong></span>
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetPinvar(p, i, fixed);</strong></span>
                }
            }
            else {
                for (auto s : vector_of_subset_names) {
                    unsigned i = _partition-&gt;findSubsetByName(s);
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetIsInvarModel(invar_model, i);</strong></span>
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetPinvar(p, i, fixed);</strong></span>
                }
            }
        }
        else if (which == "ratevar") {
            if (vector_of_values.size() &gt; 1)
                throw XStrom(boost::format("expecting 1 value for ratevar, found %d values") % vector_of_values.size());
            ASRV::ratevar_ptr_t rv = std::make_shared&lt;double&gt;(vector_of_values[0]);
            if (vector_of_subset_names[0] == "default") {
                default_found = true;
                for (unsigned i = 0; i &lt; num_subsets_defined; i++)
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetRateVar(rv, i, fixed);</strong></span>
            }
            else {
                for (auto s : vector_of_subset_names) {
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetRateVar(rv, _partition-&gt;findSubsetByName(s), fixed);</strong></span>
                }
            }
        }
        else if (which == "ncateg") {
            if (vector_of_values.size() &gt; 1)
                throw XStrom(boost::format("expecting 1 value for ncateg, found %d values") % vector_of_values.size());
            unsigned ncat = vector_of_values[0];
            if (vector_of_subset_names[0] == "default") {
                default_found = true;
                for (unsigned i = 0; i &lt; num_subsets_defined; i++)
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetNumCateg(ncat, i);</strong></span>
            }
            else {
                for (auto s : vector_of_subset_names) {
                    <span style="color:#0000ff"><strong>m-&gt;setSubsetNumCateg(ncat, _partition-&gt;findSubsetByName(s));</strong></span>
                }
            }
        }
        else if (which == "tree") {
            if (vector_of_values.size() &gt; 1)
                throw XStrom(boost::format("expecting 1 value for tree, found %d values") % vector_of_values.size());
            unsigned tree_index = vector_of_values[0];
            assert(tree_index &gt; 0);
            <span style="color:#0000ff"><strong>m-&gt;setTreeIndex(tree_index - 1, fixed);</strong></span>
            if (vector_of_subset_names[0] != "default")
                throw XStrom("tree must be assigned to default only");
        }
        else {
            assert(which == "relrate");
            if (vector_of_subset_names[0] != "default")
                throw XStrom("relrate must be assigned to default only");
            <span style="color:#0000ff"><strong>m-&gt;setSubsetRelRates(vector_of_values, fixed);</strong></span>
        }

        return default_found;
    }   

Replace the run member function

Replace the entire run function body with the version below. A lot has changed in this function, so it makes more sense to just replace it rather than show individual modifications in highlighted text. The biggest change is that the creation of model and likelihood objects has been moved into the initChains function (because a Model object and its Likelihood wrapper must be created for each Chain separately). I’ve also created several small functions to handle showing various kinds of output so that run becomes more of an outline than a narrative.

    inline void Strom::run() {  
        std::cout &lt;&lt; "Starting..." &lt;&lt; std::endl;
        std::cout &lt;&lt; "Current working directory: " &lt;&lt; boost::filesystem::current_path() &lt;&lt; std::endl;

        try {
            readData();
            readTrees();
            showPartitionInfo();

            // Create a Lot object that generates (pseudo)random numbers
            _lot = Lot::SharedPtr(new Lot);
            _lot-&gt;setSeed(_random_seed);

            // Create  Chain objects
            initChains();
            
            showBeagleInfo();
            showMCMCInfo();

            // Create an output manager and open output files
            _output_manager.reset(new OutputManager);
            _output_manager-&gt;outputConsole(boost::str(boost::format("\n%12s %12s %12s %12s") % "iteration" % "logLike" % "logPrior" % "TL"));
            _output_manager-&gt;openTreeFile("trees.tre", _data);
            _output_manager-&gt;openParameterFile("params.txt", _chains[0].getModel());
            sample(0, _chains[0]);
                        // Burn-in the chains
            startTuningChains();
            for (unsigned iteration = 1; iteration &lt;= _num_burnin_iter; ++iteration) {
                stepChains(iteration, false);
                swapChains();
            }
            stopTuningChains();

            // Sample the chains
            for (unsigned iteration = 1; iteration &lt;= _num_iter; ++iteration) {
                stepChains(iteration, true);
                swapChains();
            }
            showChainTuningInfo();
            stopChains();
            
            // Create swap summary
            swapSummary();

            // Close output files
            _output_manager-&gt;closeTreeFile();
            _output_manager-&gt;closeParameterFile();
        }
        catch (XStrom & x) {
            std::cerr &lt;&lt; "Strom encountered a problem:\n  " &lt;&lt; x.what() &lt;&lt; std::endl;
        }

        std::cout &lt;&lt; "\nFinished!" &lt;&lt; std::endl;
    }   

Add the readData member function

This function takes care of reading the data file whose name is stored in the data member _data_file_name (the value of this data member was set by the user via program options. The code in this function was previously in the body of the run function.

    inline void Strom::readData() {  
        std::cout &lt;&lt; "\n*** Reading and storing the data in the file " &lt;&lt; _data_file_name &lt;&lt; std::endl;
        _data = Data::SharedPtr(new Data());
        _data-&gt;setPartition(_partition);
        _data-&gt;getDataFromFile(_data_file_name);
    }   

Add the readTrees member function

This function takes care of reading the tree file whose name is stored in the data member _tree_file_name (the value of this data member was set by the user via program options). This function also checks to ensure that the tree has the same number of taxa as the data file and throws an exception the number of taxa differ. The code in this function was previously in the body of the run function.

    inline void Strom::readTrees() {  
        assert(_data);
        assert(_likelihoods.size() &gt; 0 && _likelihoods[0]);
        auto m = _likelihoods[0]-&gt;getModel();
        unsigned tree_index = m-&gt;getTreeIndex();
        std::cout &lt;&lt; "\n*** Reading and storing tree number " &lt;&lt; (tree_index + 1) &lt;&lt; " in the file " &lt;&lt; _tree_file_name &lt;&lt; std::endl;
        _tree_summary = TreeSummary::SharedPtr(new TreeSummary());
        _tree_summary-&gt;readTreefile(_tree_file_name, 0);

        Tree::SharedPtr tree = _tree_summary-&gt;getTree(tree_index);
        if (tree-&gt;numLeaves() != _data-&gt;getNumTaxa())
            throw XStrom(boost::format("Number of taxa in tree (%d) does not equal the number of taxa in the data matrix (%d)") % tree-&gt;numLeaves() % _data-&gt;getNumTaxa());
    }   

Add the showPartitionInfo member function

This function summarizes the data partition information. The code in this function was previously in the body of the run function.

    inline void Strom::showPartitionInfo() {  
        // Report information about data partition subsets
        unsigned nsubsets = _data-&gt;getNumSubsets();
        std::cout &lt;&lt; "\nNumber of taxa: " &lt;&lt; _data-&gt;getNumTaxa() &lt;&lt; std::endl;
        std::cout &lt;&lt; "Number of partition subsets: " &lt;&lt; nsubsets &lt;&lt; std::endl;
        for (unsigned subset = 0; subset &lt; nsubsets; subset++) {
            DataType dt = _partition-&gt;getDataTypeForSubset(subset);
            std::cout &lt;&lt; "  Subset " &lt;&lt; (subset+1) &lt;&lt; " (" &lt;&lt; _data-&gt;getSubsetName(subset) &lt;&lt; ")" &lt;&lt; std::endl;
            std::cout &lt;&lt; "    data type: " &lt;&lt; dt.getDataTypeAsString() &lt;&lt; std::endl;
            std::cout &lt;&lt; "    sites:     " &lt;&lt; _data-&gt;calcSeqLenInSubset(subset) &lt;&lt; std::endl;
            std::cout &lt;&lt; "    patterns:  " &lt;&lt; _data-&gt;getNumPatternsInSubset(subset) &lt;&lt; std::endl;
            std::cout &lt;&lt; "    ambiguity: " &lt;&lt; (_ambig_missing || dt.isCodon() ? "treated as missing data (faster)" : "handled appropriately (slower)") &lt;&lt; std::endl;
        }
    }  

Add the showBeagleInfo member function

This function shows what BeagleLib resources are available. The code in this function was previously in the body of the run function.

    inline void Strom::showBeagleInfo() {  
        assert(_likelihoods.size() &gt; 0 && _likelihoods[0]);
        std::cout &lt;&lt; "\n*** BeagleLib " &lt;&lt; _likelihoods[0]-&gt;beagleLibVersion() &lt;&lt; " resources:\n";
        std::cout &lt;&lt; "Preferred resource: " &lt;&lt; (_use_gpu ? "GPU" : "CPU") &lt;&lt; std::endl;
        std::cout &lt;&lt; "Available resources:" &lt;&lt; std::endl;
        std::cout &lt;&lt; _likelihoods[0]-&gt;availableResources() &lt;&lt; std::endl;
        std::cout &lt;&lt; "Resources used:" &lt;&lt; std::endl;
        std::cout &lt;&lt; _likelihoods[0]-&gt;usedResources() &lt;&lt; std::endl;
    }  

Add the calcHeatingPowers member function

This is a new function called within another new function, initChains (described below). Its job is to compute the heating power of each chain using the chain index and the heatfactor (data member _heating_lambda) specified by the user on the command line. The chain with index 0 is the cold chain and has power 1.0 (i.e. it explores the posterior distribution), while chains with indices greater than 0 explore distributions that are more gentle (valleys higher, peaks lower) due to the fact that the posterior density is raised to a power between 0.0 and 1.0 (determined by this function).

    inline void Strom::calcHeatingPowers() {    
        // Specify chain heating power (e.g. _heating_lambda = 0.2)
        // chain_index  power
        //      0       1.000 = 1/(1 + 0.2*0)
        //      1       0.833 = 1/(1 + 0.2*1)
        //      2       0.714 = 1/(1 + 0.2*2)
        //      3       0.625 = 1/(1 + 0.2*3)
        unsigned i = 0;
        for (auto & h : _heating_powers) {
            h = 1.0/(1.0 + _heating_lambda*i++);
        }
    }   

Add the initChains member function

This new function equips each chain with an index, starting tree, random number generator, model, likelihood calculator, and heating power.

    inline void Strom::initChains() {   
        // Create _num_chains chains
        _chains.resize(_num_chains);
        
        // Create _num_chains by _num_chains swap matrix
        _swaps.assign(_num_chains*_num_chains, 0);

        // Create heating power vector
        _heating_powers.assign(_num_chains, 1.0);
        calcHeatingPowers();
        
        // Initialize chains
        for (unsigned chain_index = 0; chain_index &lt; _num_chains; ++chain_index) {
            auto & c        = _chains[chain_index];
            auto likelihood = _likelihoods[chain_index];
            auto m          = likelihood-&gt;getModel();
            
            // Finish setting up models
            m-&gt;setSubsetNumPatterns(_data-&gt;calcNumPatternsVect());
            m-&gt;setSubsetSizes(_partition-&gt;calcSubsetSizes());
            m-&gt;activate();
            if (chain_index == 0)
                std::cout &lt;&lt; "\n" &lt;&lt; m-&gt;describeModel() &lt;&lt; std::endl;
            else
                m-&gt;describeModel();
                
            // Finish setting up likelihoods
            likelihood-&gt;setData(_data);
            likelihood-&gt;useUnderflowScaling(_use_underflow_scaling);
            likelihood-&gt;initBeagleLib();
            likelihood-&gt;useStoredData(_using_stored_data);
            
            // Build list of updaters, one for each free parameter in the model
            unsigned num_free_parameters = c.createUpdaters(m, _lot, likelihood);
            if (num_free_parameters == 0)
                throw XStrom("MCMC skipped because there are no free parameters in the model");

            // Tell the chain that it should adapt its updators (at least initially)
            c.startTuning();

            // Set heating power to precalculated value
            c.setChainIndex(chain_index);
            c.setHeatingPower(_heating_powers[chain_index]);
            
            // Give the chain a starting tree
            std::string newick = _tree_summary-&gt;getNewick(m-&gt;getTreeIndex());
            c.setTreeFromNewick(newick);

            // Print headers in output files and make sure each updator has its starting value
            c.start();
        }
    }   

Add the showMCMCInfo member function

This function spits out information about the MCMC analysis. It is called just before the burnin iteration loop begins. If data is being ignored, it says “Exploring prior”; otherwise, it computes and displays the starting likelihood, arbitrarily using the first Likelihood object in the _likelihoods vector (all Likelihood objects are identical at this point except for the BeagleLib instance assigned to them).

    inline void Strom::showMCMCInfo() {  
        assert(_likelihoods.size() &gt; 0 && _likelihoods[0]);
        std::cout &lt;&lt; "\n*** MCMC analysis beginning..." &lt;&lt; std::endl;
        if (_likelihoods[0]-&gt;usingStoredData()) {
            unsigned tree_index = _likelihoods[0]-&gt;getModel()-&gt;getTreeIndex();
            Tree::SharedPtr tree = _tree_summary-&gt;getTree(tree_index);
            double lnL = _chains[0].getLogLikelihood();
            std::cout &lt;&lt; boost::str(boost::format("Starting log likelihood = %.5f") % lnL) &lt;&lt; std::endl;
        }
        else
            std::cout &lt;&lt; "Exploring prior" &lt;&lt; std::endl;
    
        if (_expected_log_likelihood != 0.0)
            std::cout &lt;&lt; boost::str(boost::format("      (expecting %.3f)") % _expected_log_likelihood) &lt;&lt; std::endl;
    
        std::cout &lt;&lt; "Number of chains is " &lt;&lt; _num_chains &lt;&lt; std::endl;
        std::cout &lt;&lt; "Burning in for " &lt;&lt; _num_burnin_iter &lt;&lt; " iterations." &lt;&lt; std::endl;
        std::cout &lt;&lt; "Running after burn-in for " &lt;&lt; _num_iter &lt;&lt; " iterations." &lt;&lt; std::endl;
        std::cout &lt;&lt; "Sampling every " &lt;&lt; _sample_freq &lt;&lt; " iterations." &lt;&lt; std::endl;
        std::cout &lt;&lt; "Sample size is " &lt;&lt; (int)(_num_iter/_sample_freq) &lt;&lt; std::endl;
                
    }  

Add the startTuningChains and stopTuningChains member functions

These functions tell each chain to start and stop autotuning, respectively. The startTuningChains function should be called prior to burn-in, and the stopTuningChains should be called after burn-in so that tuning parameters are not modified during the sampling run.

    inline void Strom::startTuningChains() { 
        _swaps.assign(_num_chains*_num_chains, 0);
        for (auto & c : _chains) {
            c.startTuning();
        }
    }
    
    inline void Strom::stopTuningChains() {
        _swaps.assign(_num_chains*_num_chains, 0);
        for (auto & c : _chains) {
            c.stopTuning();
        }
    }   

Add the stepChains member function

This function loops through chains, telling each one to take one step. Each step involves calling the update method of each updater. The sample function is also called if sampling is true. Note that just because sample is called does not necessarily mean that a line is added to the tree and parameter file; that depends on the iteration and degree of thinning specified by the user. Also, sample does nothing if the chain c passed to it does not currently represent the cold chain. Remember that chain 0 started out as the cold chain but cannot any longer be assumed to be the cold chain because the cold chain may have swapped with one of the heated chains in the interim.

    inline void Strom::stepChains(unsigned iteration, bool sampling) {  
        for (auto & c : _chains) {
             c.nextStep(iteration);
            if (sampling)
                sample(iteration, c);
        }
    }   

Add the swapChains member function

The swapChains function selects two chains at random and asks them to attempt a swap. The swap is successful if a uniform random deviate is less than or equal to the coupled acceptance probability. Although it is not quite this simple, essentially both chains have to be able to move to where the other chain is located in order for the proposed swap to be accepted. If a swap is accepted, then the chain objects swap indices, tuning parameters, and heating power. It is much simpler to swap these three simple quantities than it would be to swap the entire state (tree topology, all edge lengths, and the current value of all substitution model parameters). The tuning parameters need to be swapped because the chains are exploring different landscapes, and a chain tuned for one landscape would not mix well if it suddenly found itself using a tuning parameter adjusted for a different landscape.

    inline void Strom::swapChains() {   
        if (_num_chains == 1)
            return;

        // Select two chains at random to swap
        // If _num_chains = 3...
        //  i  j  = (i + 1 + randint(0,1)) % _num_chains
        // ---------------------------------------------
        //  0  1  = (0 + 1 +      0      ) %     3
        //     2  = (0 + 1 +      1      ) %     3
        // ---------------------------------------------
        //  1  2  = (1 + 1 +      0      ) %     3
        //     0  = (1 + 1 +      1      ) %     3
        // ---------------------------------------------
        //  2  0  = (2 + 1 +      0      ) %     3
        //     1  = (2 + 1 +      1      ) %     3
        // ---------------------------------------------
        unsigned i = (unsigned)_lot-&gt;randint(0, _num_chains-1);
        unsigned j = i + 1 + (unsigned)_lot-&gt;randint(0, _num_chains-2);
        j %= _num_chains;

        assert(i != j && i &gt;=0 && i &lt; _num_chains && j &gt;= 0 && j &lt; _num_chains);

        // Determine upper and lower triangle cells in _swaps vector
        unsigned smaller = _num_chains;
        unsigned larger  = _num_chains;
        double index_i   = _chains[i].getChainIndex();
        double index_j   = _chains[j].getChainIndex();
        if (index_i &lt; index_j) {
            smaller = index_i;
            larger  = index_j;
        }
        else {
            smaller = index_j;
            larger  = index_i;
        }
        unsigned upper = smaller*_num_chains + larger;
        unsigned lower = larger*_num_chains  + smaller;
        _swaps[upper]++;

        // Propose swap of chains i and j
        // Proposed state swap will be successful if a uniform random deviate is less than R, where
        //    R = Ri * Rj = (Pi(j) / Pi(i)) * (Pj(i) / Pj(j))
        // Chain i: power = a, kernel = pi
        // Chain j: power = b, kernel = pj
        //      pj^a         pi^b
        // Ri = ----    Rj = ----
        //      pi^a         pj^b
        // log R = (a-b) [log(pj) - log(pi)]

        double heat_i       = _chains[i].getHeatingPower();
        double log_kernel_i = _chains[i].calcLogLikelihood() + _chains[i].calcLogJointPrior();

        double heat_j       = _chains[j].getHeatingPower();
        double log_kernel_j = _chains[j].calcLogLikelihood() + _chains[j].calcLogJointPrior();

        double logR = (heat_i - heat_j)*(log_kernel_j - log_kernel_i);

        double logu = _lot-&gt;logUniform();
        if (logu &lt; logR) {
            // accept swap
            _swaps[lower]++;
            _chains[j].setHeatingPower(heat_i);
            _chains[i].setHeatingPower(heat_j);
            _chains[j].setChainIndex(index_i);
            _chains[i].setChainIndex(index_j);
            std::vector&lt;double&gt; lambdas_i = _chains[i].getLambdas();
            std::vector&lt;double&gt; lambdas_j = _chains[j].getLambdas();
            _chains[i].setLambdas(lambdas_j);
            _chains[j].setLambdas(lambdas_i);
        }
    }   

Add the showChainTuningInfo member function

This function displays the tuning parameter values for each defined updater. Because autotuning is performed during burn-in, these will differ from the starting values defined in Chain::createUpdaters after burn-in is completed. This function also dispays the acceptance percentage for each updater, so it makes sense to call this function after the sampling run is finished in order to see the acceptance rates that were achieved during sampling.

    inline void Strom::showChainTuningInfo() const {    
        for (unsigned idx = 0; idx &lt; _num_chains; ++idx) {
            for (auto & c : _chains) {
                if (c.getChainIndex() == idx) {
                    _output_manager-&gt;outputConsole(boost::str(boost::format("\nChain %d (power %.5f)") % idx % c.getHeatingPower()));
                    std::vector&lt;std::string&gt; names = c.getUpdaterNames();
                    std::vector&lt;double&gt; lambdas    = c.getLambdas();
                    std::vector&lt;double&gt; accepts    = c.getAcceptPercentages();
                    std::vector&lt;unsigned&gt; nupdates = c.getNumUpdates();
                    unsigned n = (unsigned)names.size();
                    _output_manager-&gt;outputConsole(boost::str(boost::format("%35s %15s %15s %15s") % "Updater" % "Tuning Param." % "Accept %" % "No. Updates"));
                    for (unsigned i = 0; i &lt; n; ++i) {
                        _output_manager-&gt;outputConsole(boost::str(boost::format("%35s %15.3f %15.1f %15d") % names[i] % lambdas[i] % accepts[i] % nupdates[i]));
                    }
                }
            }
        }
    }   

Add the stopChains member function

This function loops through all chains and calls the stop function for each.

    inline void Strom::stopChains() {   
        for (auto & c : _chains)
            c.stop();
    }   

Add the swapSummary member function

Each time two chains attempt a swap, the event is recorded in the _swaps data member, which is a vector that is used to represent a 2-dimensional matrix in which the upper triangle is used to store the number of attempted swaps between each pair of chains and the bottom triangle is used to store the number of successful swaps between each pair. This function simply displays the _swaps vector in the form of a table.

    inline void Strom::swapSummary() const {    
        if (_num_chains &gt; 1) {
            unsigned i, j;
            std::cout &lt;&lt; "\nSwap summary (upper triangle = no. attempted swaps; lower triangle = no. successful swaps):" &lt;&lt; std::endl;

            // column headers
            std::cout &lt;&lt; boost::str(boost::format("%12s") % " ");
            for (i = 0; i &lt; _num_chains; ++i)
                std::cout &lt;&lt; boost::str(boost::format(" %12d") % i);
            std::cout &lt;&lt; std::endl;

            // top line
            std::cout &lt;&lt; boost::str(boost::format("%12s") % "------------");
            for (i = 0; i &lt; _num_chains; ++i)
                std::cout &lt;&lt; boost::str(boost::format("-%12s") % "------------");
            std::cout &lt;&lt; std::endl;

            // table proper
            for (i = 0; i &lt; _num_chains; ++i) {
                std::cout &lt;&lt; boost::str(boost::format("%12d") % i);
                for (j = 0; j &lt; _num_chains; ++j) {
                    if (i == j)
                        std::cout &lt;&lt; boost::str(boost::format(" %12s") % "---");
                    else
                        std::cout &lt;&lt; boost::str(boost::format(" %12.5f") % _swaps[i*_num_chains + j]);
                }
                std::cout &lt;&lt; std::endl;
            }

            // bottom line
            std::cout &lt;&lt; boost::str(boost::format("%12s") % "------------");
            for (i = 0; i &lt; _num_chains; ++i)
                std::cout &lt;&lt; boost::str(boost::format("-%12s") % "------------");
            std::cout &lt;&lt; std::endl;
        }
    }   

< 18.0 | 18.1 | 18.2 >