11.2 The Model class

(Linux version)

< 11.1 | 11.2 | 11.3 >

By way of a data member (_model) of the Likelihood class, the Model class will supply BeagleLib with the eigenvectors, eigenvalues, and relative rates it needs in order to compute the likelihood of a tree. The Eigen library is used to compute the eigenvectors and eigenvalues for a given combination of exchangeabilities and nucleotide (or codon) frequencies.

The model class will manage BeagleLib instances: if subsets differ in the number of states or number of rate categories, different BeagleLib instances must be created for each combination. The Model class will thus be our BeagleLib manager.

Begin by creating a new header file named model.hpp to contain the Model class declaration and member function bodies:

#pragma once    

#include &lt;algorithm&gt;
#include &lt;vector&gt;
#include "datatype.hpp"
#include "qmatrix.hpp"
#include "asrv.hpp"
#include "libhmsbeagle/beagle.h"
#include &lt;boost/math/distributions/gamma.hpp&gt;
#include &lt;Eigen/Dense&gt;

namespace strom {
    
    class Likelihood;

    class Model {
        
        friend class Likelihood;

        public:
            typedef std::vector&lt;ASRV::SharedPtr&gt;      asrv_vect_t;
            typedef std::vector&lt;QMatrix::SharedPtr&gt;   qmat_vect_t;
            typedef std::vector&lt;unsigned&gt;             subset_sizes_t;
            typedef std::vector&lt;DataType&gt;             subset_datatype_t;
            typedef std::vector&lt;double&gt;               subset_relrate_vect_t;
            typedef boost::shared_ptr&lt;Model&gt;          SharedPtr;
        
                                        Model();
                                        ~Model();

            void                        activate();
            void                        inactivate();
            
            std::string                 describeModel();

            void                        setSubsetDataTypes(const subset_datatype_t & datatype_vect);

            void                        setSubsetRateVar(ASRV::ratevar_ptr_t ratevar, unsigned subset, bool fixed);
            void                        setSubsetPinvar(ASRV::pinvar_ptr_t pinvar, unsigned subset, bool fixed);
            void                        setSubsetExchangeabilities(QMatrix::freq_xchg_ptr_t exchangeabilities, unsigned subset, bool fixed);
            void                        setSubsetStateFreqs(QMatrix::freq_xchg_ptr_t state_frequencies, unsigned subset, bool fixed);
            void                        setSubsetOmega(QMatrix::omega_ptr_t omega, unsigned subset, bool fixed);

            void                        setSubsetRelRates(subset_relrate_vect_t & relrates, bool fixed);
            subset_relrate_vect_t &     getSubsetRelRates();
            bool                        isFixedSubsetRelRates() const;
            double                      calcNormalizingConstantForSubsetRelRates() const;

            void                        setTreeIndex(unsigned i, bool fixed);
            unsigned                    getTreeIndex() const;
            bool                        isFixedTree() const;
        
            unsigned                    getNumSubsets() const;
            unsigned                    getNumSites() const;
            unsigned                    getSubsetNumSites(unsigned subset) const;
            const QMatrix &             getQMatrix(unsigned subset) const;
            const ASRV &                getASRV(unsigned subset) const;

            void                        setSubsetIsInvarModel(bool is_invar, unsigned subset);
            bool                        getSubsetIsInvarModel(unsigned subset) const;

            void                        setSubsetSizes(const subset_sizes_t nsites_vect);
            subset_sizes_t &            getSubsetSizes();

            void                        setSubsetNumPatterns(const subset_sizes_t npatterns_vect);
            unsigned                    getSubsetNumPatterns(unsigned subset) const;

            void                        setSubsetNumCateg(unsigned ncateg, unsigned subset);
            unsigned                    getSubsetNumCateg(unsigned subset) const;

            int                         setBeagleEigenDecomposition(int beagle_instance, unsigned subset, unsigned instance_subset);
            int                         setBeagleStateFrequencies(int beagle_instance, unsigned subset, unsigned instance_subset);
            int                         setBeagleAmongSiteRateVariationRates(int beagle_instance, unsigned subset, unsigned instance_subset);
            int                         setBeagleAmongSiteRateVariationProbs(int beagle_instance, unsigned subset, unsigned instance_subset);

        private:
        
            void                        clear();
        
            unsigned                    _num_subsets;
            unsigned                    _num_sites;
            subset_sizes_t              _subset_sizes;
            subset_sizes_t              _subset_npatterns;
            subset_datatype_t           _subset_datatypes;
            qmat_vect_t                 _qmatrix;
            asrv_vect_t                 _asrv;
        
            bool                        _tree_index;
            bool                        _tree_fixed;
        
            bool                        _subset_relrates_fixed;
            subset_relrate_vect_t       _subset_relrates;
        };
    
    // member function bodies go here
    
}   

Constructor, destructor, and clear functions

The constructor and destructor are very simple, and are similar to most of the other constructors and destructors you have created thus far.

    inline Model::Model() { 
        //std::cout &lt;&lt; "Constructing a Model" &lt;&lt; std::endl;
        clear();
    } 

    inline Model::~Model() {
        //std::cout &lt;&lt; "Destroying a Model" &lt;&lt; std::endl;
    }

    inline void Model::clear() {
        _num_subsets = 0;
        _num_sites = 0;
        _tree_index = 0;
        _tree_fixed = false;
        _subset_relrates_fixed = false;
        _subset_relrates.clear();
        _subset_sizes.clear();
        _subset_npatterns.clear();
        _subset_datatypes.clear();
        _qmatrix.clear();
        _asrv.clear();
    }   

The describeModel member function

This function reveals the current state of the model. It constructs and returns a string, which can be output to either std::cout or to a file.

This function will probably appear overly long and complicated to you! It could be made much shorted if it simply regurgitated the model specification provided by the user, but this function does a lot of sanity checking, reporting features of the model as it actually exists in memory. If this description differs from the model specified in the strom.conf file, then it is a clear sign that there is a bug. Later this function will be used to enumerate free model parameters that need updating during an MCMC analysis, so it could be argued that the name describeModel falls short of fully describing its significance.

Some explanation of how this function works follows the code.

    inline std::string Model::describeModel() { 
        // Creates summary such as following and returns as a string:
        //
        // Partition information:
        //
        //          data subset           1           2           3
        //    -----------------------------------------------------
        //           num. sites          20          20          20
        //        num. patterns           7           5          17
        //          num. states           4           4           4
        //      rate categories           4           1           4
        //
        // Parameter linkage:
        //
        //          data subset           1           2           3
        //    -----------------------------------------------------
        //          state freqs           1           1           1
        //    exchangeabilities           1           1           2
        //        rate variance           1           2           3
        //               pinvar           1           2           -
        
        // Sets used to determine which parameters are linked across subsets
        std::set&lt;double *&gt; freqset;
        std::set&lt;double *&gt; xchgset;
        std::set&lt;double *&gt; omegaset;
        std::set&lt;double *&gt; ratevarset;
        std::set&lt;double *&gt; pinvarset;
        std::set&lt;double *&gt; relrateset;

        // Vectors of pointers to distinct parameters
        std::vector&lt;double *&gt; unique_freq;
        std::vector&lt;double *&gt; unique_xchg;
        std::vector&lt;double *&gt; unique_omega;
        std::vector&lt;double *&gt; unique_ratevar;
        std::vector&lt;double *&gt; unique_pinvar;
        std::vector&lt;double *&gt; unique_relrate;

        // Map for storing strings that will contain the information for each row
        std::map&lt;std::string, std::string&gt; ss = {
            {"subset",    ""},
            {"dashes",    ""},
            {"freqs",     ""},
            {"xchg",      ""},
            {"omega",     ""},
            {"ratevar",   ""},
            {"pinvar",    ""},
            {"ncateg",    ""},
            {"nsites",    ""},
            {"npatterns", ""},
            {"nstates",   ""}
        };
        
        // Ensure that the subset relative rates are fixed if there is only one
        // subset; otherwise the subset relative rates will be added to the list
        // of free parameters that are updated, which makes no sense in this case
        if (_num_subsets == 1)
            _subset_relrates_fixed = true;

        // Loop through subsets, building up rows as we go
        for (unsigned i = 0; i &lt; _num_subsets; i++) {
            // Ensure that for subsets in which the number of rate categories is 1 that
            // the gamma rate variance is fixed; otherwise the gamma rate variance will
            // be added to the list of free parameters that are updated, which makes
            // no sense in this case
            if (_asrv[i]-&gt;getNumCateg() == 1) {
                _asrv[i]-&gt;fixRateVar(true);
            }
        
            unsigned index;
            ss["subset"] += boost::str(boost::format("%12d") % (i+1));
            ss["dashes"] += "------------";

            // Determine whether state freqs are unique for this subset
            QMatrix::freq_xchg_ptr_t pfreq = _qmatrix[i]-&gt;getStateFreqsSharedPtr();
            QMatrix::freq_xchg_t & freq = *pfreq;
            double * freq_addr = &freq[0];
            auto f = freqset.insert(freq_addr);
            if (f.second) {
                unique_freq.push_back(freq_addr);
                index = (unsigned)unique_freq.size();
            }
            else {
                auto iter = std::find(unique_freq.begin(), unique_freq.end(), freq_addr);
                index = (unsigned)std::distance(unique_freq.begin(), iter) + 1;
            }
            ss["freqs"] += boost::str(boost::format("%12d") % index);

            // Determine whether exchangeabilities are unique for this subset   
            if (_subset_datatypes[i].isNucleotide()) {
                QMatrix::freq_xchg_ptr_t pxchg = _qmatrix[i]-&gt;getExchangeabilitiesSharedPtr();
                QMatrix::freq_xchg_t & xchg = *pxchg;
                double * xchg_addr = &xchg[0];
                auto x = xchgset.insert(xchg_addr);
                if (x.second) {
                    unique_xchg.push_back(xchg_addr);
                    index = (unsigned)unique_xchg.size();
                }
                else {
                    auto iter = std::find(unique_xchg.begin(), unique_xchg.end(), xchg_addr);
                    index = (unsigned)std::distance(unique_xchg.begin(), iter) + 1;
                }
                ss["xchg"] += boost::str(boost::format("%12d") % index);
            }
            else {
                ss["xchg"] += boost::str(boost::format("%12s") % "-");
            }   
            
            // Determine whether omega is unique for this subset
            if (_subset_datatypes[i].isCodon()) {
                QMatrix::omega_ptr_t pomega = _qmatrix[i]-&gt;getOmegaSharedPtr();
                QMatrix::omega_t omegavalue = *pomega;
                double * omega_addr = &omegavalue;
                auto o = omegaset.insert(omega_addr);
                if (o.second) {
                    unique_omega.push_back(omega_addr);
                    index = (unsigned)unique_omega.size();
                }
                else {
                    auto iter = std::find(unique_omega.begin(), unique_omega.end(), omega_addr);
                    index = (unsigned)std::distance(unique_omega.begin(), iter) + 1;
                }
                ss["omega"] += boost::str(boost::format("%12d") % index);
            }
            else {
                ss["omega"] += boost::str(boost::format("%12s") % "-");
            }

            // Determine whether rate variance is unique for this subset
            ASRV::ratevar_ptr_t pratevar = _asrv[i]-&gt;getRateVarSharedPtr();
            double & ratevar = *pratevar;
            double * ratevar_addr = &ratevar;
            auto r = ratevarset.insert(ratevar_addr);
            if (r.second) {
                unique_ratevar.push_back(ratevar_addr);
                index = (unsigned)unique_ratevar.size();
            }
            else {
                auto iter = std::find(unique_ratevar.begin(), unique_ratevar.end(), ratevar_addr);
                index = (unsigned)std::distance(unique_ratevar.begin(), iter) + 1;
            }
            ss["ratevar"] += boost::str(boost::format("%12d") % index);
            
            // Determine whether pinvar is unique for this subset
            if (_asrv[i]-&gt;getIsInvarModel()) {
                ASRV::pinvar_ptr_t ppinvar = _asrv[i]-&gt;getPinvarSharedPtr();
                double & pinvar = *ppinvar;
                double * pinvar_addr = &pinvar;
                auto r = pinvarset.insert(pinvar_addr);
                if (r.second) {
                    unique_pinvar.push_back(pinvar_addr);
                    index = (unsigned)unique_pinvar.size();
                }
                else {
                    auto iter = std::find(unique_pinvar.begin(), unique_pinvar.end(), pinvar_addr);
                    index = (unsigned)std::distance(unique_pinvar.begin(), iter) + 1;
                }
                ss["pinvar"] += boost::str(boost::format("%12d") % index);
            }
            else {
                ss["pinvar"] += boost::str(boost::format("%12s") % "-");
            }
            
            // Save number of rate categories for this subset
            ss["ncateg"] += boost::str(boost::format("%12d") % _asrv[i]-&gt;getNumCateg());

            // Save number of sites for this subset
            ss["nsites"] += boost::str(boost::format("%12d") % _subset_sizes[i]);

            // Save number of patterns for this subset
            ss["npatterns"] += boost::str(boost::format("%12d") % _subset_npatterns[i]);

            // Save number of states for this subset
            if (_subset_datatypes.size() == _num_subsets)
                ss["nstates"] += boost::str(boost::format("%12d") % _subset_datatypes[i].getNumStates());
            else
                ss["nstates"] += boost::str(boost::format("%12s") % "?");

        }
        std::string s = "Partition information:\n\n";
        
        s += boost::str(boost::format("%20s%s\n") % "data subset" % ss["subset"]);
        s += boost::str(boost::format("%20s%s\n") % "-----------------" % ss["dashes"]);
        s += boost::str(boost::format("%20s%s\n") % "num. sites" % ss["nsites"]);
        s += boost::str(boost::format("%20s%s\n") % "num. patterns" % ss["npatterns"]);
        s += boost::str(boost::format("%20s%s\n") % "num. states" % ss["nstates"]);
        s += boost::str(boost::format("%20s%s\n") % "rate categories" % ss["ncateg"]);

        s += "\nParameter linkage:\n\n";
        
        s += boost::str(boost::format("%20s%s\n") % "data subset" % ss["subset"]);
        s += boost::str(boost::format("%20s%s\n") % "-----------------" % ss["dashes"]);
        s += boost::str(boost::format("%20s%s\n") % "state freqs" % ss["freqs"]);
        s += boost::str(boost::format("%20s%s\n") % "exchangeabilities" % ss["xchg"]);
        s += boost::str(boost::format("%20s%s\n") % "omega" % ss["omega"]);
        s += boost::str(boost::format("%20s%s\n") % "rate variance" % ss["ratevar"]);
        s += boost::str(boost::format("%20s%s\n") % "pinvar" % ss["pinvar"]);
        
        s += "\nParameter values for each subset:\n";

        s += "\n  relative rate:\n";
        for (unsigned i = 0; i &lt; _num_subsets; i++) {
            s += boost::str(boost::format("  %12d: %g\n") % (i+1) % _subset_relrates[i]);
        }
        
        s += "\n  state freqs:\n";
        for (unsigned i = 0; i &lt; _num_subsets; i++) {
            QMatrix::freq_xchg_t & freqs = *(_qmatrix[i]-&gt;getStateFreqsSharedPtr());
            std::vector&lt;std::string&gt; freqs_as_strings(freqs.size());
            std::transform(freqs.begin(), freqs.end(), freqs_as_strings.begin(), [](double freq) {return boost::str(boost::format("%g") % freq);});
            std::string tmp = boost::algorithm::join(freqs_as_strings, ",");
            s += boost::str(boost::format("  %12d: (%s)\n") % (i+1) % tmp);
        }

        s += "\n  exchangeabilities:\n";
        for (unsigned i = 0; i &lt; _num_subsets; i++) {
            if (_subset_datatypes[i].isNucleotide()) {
                QMatrix::freq_xchg_t & xchg = *(_qmatrix[i]-&gt;getExchangeabilitiesSharedPtr());
                std::vector&lt;std::string&gt; xchg_as_strings(xchg.size());
                std::transform(xchg.begin(), xchg.end(), xchg_as_strings.begin(), [](double x) {return boost::str(boost::format("%g") % x);});
                std::string tmp = boost::algorithm::join(xchg_as_strings, ",");
                s += boost::str(boost::format("  %12d: (%s)\n") % (i+1) % tmp);
            }
            else {
                s += boost::str(boost::format("  %12d: -\n") % (i+1));
            }
        }

        s += "\n  omega:\n";
        for (unsigned i = 0; i &lt; _num_subsets; i++) {
            if (_subset_datatypes[i].isCodon()) {
                double omega = *(_qmatrix[i]-&gt;getOmegaSharedPtr());
                s += boost::str(boost::format("  %12d: %g\n") % (i+1) % omega);
            }
            else {
                s += boost::str(boost::format("  %12d: -\n") % (i+1));
            }
        }

        s += "\n  rate variance:\n";
        for (unsigned i = 0; i &lt; _num_subsets; i++) {
            if (_asrv[i]-&gt;getNumCateg() &gt; 1) {
                double ratevar = *(_asrv[i]-&gt;getRateVarSharedPtr());
                s += boost::str(boost::format("  %12d: %g\n") % (i+1) % ratevar);
            }
            else
                s += boost::str(boost::format("  %12d: -\n") % (i+1));
        }

        s += "\n  pinvar:\n";
        for (unsigned i = 0; i &lt; _num_subsets; i++) {
            double pinvar = *(_asrv[i]-&gt;getPinvarSharedPtr());
            bool is_invar_model = _asrv[i]-&gt;getIsInvarModel();
            if (is_invar_model)
                s += boost::str(boost::format("  %12d: %g\n") % (i+1) % pinvar);
            else
                s += boost::str(boost::format("  %12d: -\n") % (i+1));
        }

        return s;
    } 

Each row of output is stored as one element of the map ss, which has keys that are descriptive of the information in that particular row.

The table labeled “Partition information” is fairly straightforward. The number of sites, patterns, number of states, and number of (discrete gamma) rate categories for each subset are appended to the appropriate element of ss using boost::format to ensure they are all right-justified.

The second table shows how many unique subset relative rates, state frequency vectors, exchangeability vectors, omegas, rate variances, and proportions of invariable sites are defined in the model. This is somewhat more tricky to determine, and rather than just use the information supplied by the user in the configuration file, this function determines actual linkage of parameters across subsets as a sanity check to make sure that the model is set up as the user intended.

To do this, 6 std::set variables are created (freqset, xchgset, omegaset, ratevarset, pinvarset, and relrateset). I’ll use exchangeabilities to illustrate, as they are one of the more interesting parameters defined in this example, being shared across 2 subsets and distinct for the 3rd. Here is the relevant code. This is part of a loop over subsets, where i is the current subset index:

            // Determine whether exchangeabilities are unique for this subset   
            if (_subset_datatypes[i].isNucleotide()) {
                QMatrix::freq_xchg_ptr_t pxchg = _qmatrix[i]-&gt;getExchangeabilitiesSharedPtr();
                QMatrix::freq_xchg_t & xchg = *pxchg;
                double * xchg_addr = &xchg[0];
                auto x = xchgset.insert(xchg_addr);
                if (x.second) {
                    unique_xchg.push_back(xchg_addr);
                    index = (unsigned)unique_xchg.size();
                }
                else {
                    auto iter = std::find(unique_xchg.begin(), unique_xchg.end(), xchg_addr);
                    index = (unsigned)std::distance(unique_xchg.begin(), iter) + 1;
                }
                ss["xchg"] += boost::str(boost::format("%12d") % index);
            }
            else {
                ss["xchg"] += boost::str(boost::format("%12s") % "-");
            }   

First, the reference xchg is assigned to the exchangeabilities for subset i. In the example model specification used for this step, the configuration file contains these lines pertaining to the assignment of exchangeabilities across subsets:

rmatrix = first,second:1,1,1,1,1,1  
rmatrix = third:1,2,1,1,2,1         

This specifies that the exact same exchangeabilities should be used for both subsets first and second, while a distinct set of exchangeabilities should be used for subset third. This means that the shared pointers to exchangeabilities stored in _qmatrix[0] and _qmatrix[1] should point to the same array in memory, and the shared pointer to the exchangeabilities stored in _qmatrix[2] should point to a different memory location. When visiting subset i, we find the address of the exchangeability array used for that subset (xchg_addr) and insert that address into the set xchgset. The return value of the std::set::insert function is a std::pair, the first element of which is an iterator positioned at the inserted element and the second of which is true if an insertion took place, false otherwise. If an insertion took place, it means that the memory address xchg_addr is distinct, and we thus push xchg_addr onto the end of the vector unique_xchg, which holds the distinct memory addresses of each unique exchangeability vector in the order in which it was discovered when iterating through subsets. If an insertion did not take place, it means that the memory address xchg_addr was not distinct. In this case, the index of that address in the unique_xchg vector is found. In either case, the index of the element in the unique_xchg vector (plus 1) serves as the value stored for subset i in the row corresponding to exchangeabilities.

Note that this approach will not attribute the same index to two different subsets if only the exchangeability values are identical; it is the memory address of the first element of the exchangeability vector that matters. It is possible for two subsets to have potentially different exchangeability vectors but not be distinct at the moment this function was called (for example, it is quite possible that exchangeability vectors for every subset are allowed to differ but were all initialized to 1,1,1,1,1,1 at the start of the program).

Don’t fret that you don’t know about the ASRV and QMatrix classes, those will be introduced in coming sections. For now just think of them as keepers of parameters and information related to Among-Site Rate Variation (ASRV) and the instantaneous rate matrix (QMatrix).

Accessors

These 8 functions (getSubsetNumPatterns, getSubsetNumSites, getNumSites, getNumSubsets, getSubsetNumCateg, getSubsetIsInvarModel, getQMatrix, and getASRV) simply provide a way to peek at the values of private data members.

    inline unsigned Model::getSubsetNumPatterns(unsigned subset) const { 
        assert(subset &lt; _num_subsets);
        return _subset_npatterns[subset];
    }
    
    inline unsigned Model::getSubsetNumSites(unsigned subset) const {
        assert(subset &lt; _num_subsets);
        return _subset_sizes[subset];
    } 
    
    inline unsigned Model::getNumSites() const {
        return _num_sites;
    }
    
    inline unsigned Model::getNumSubsets() const {
        return _num_subsets;
    }
    
    inline unsigned Model::getSubsetNumCateg(unsigned subset) const {
        assert(subset &lt; _num_subsets);
        assert(_asrv.size() == _num_subsets);
        assert(_asrv[subset]);
        return _asrv[subset]-&gt;getNumCateg();
    } 
    
    inline bool Model::getSubsetIsInvarModel(unsigned subset) const {
        assert(subset &lt; _num_subsets);
        assert(_asrv.size() == _num_subsets);
        assert(_asrv[subset]);
        return _asrv[subset]-&gt;getIsInvarModel();
    }
    
    inline const QMatrix & Model::getQMatrix(unsigned subset) const {
        assert(subset &lt; _num_subsets);
        return *(_qmatrix[subset]);
    } 
    
    inline const ASRV & Model::getASRV(unsigned subset) const {
        assert(subset &lt; _num_subsets);
        return *(_asrv[subset]);
    } 

The calcNormalizingConstantForSubsetRelRates member function

This function calculates and returns the sum of the products of subset proportions and subset relative rates. The subset relative rates must be normalized so that they don’t affect the mean rate, which is determined by the edge lengths of the tree. To compute the normalization constant, we compute the sum of the products of subset proportions and subset relative rates. For example, for 3 subsets,

normalizing_constant = p1*r1 + p2*r2 +p3*r3

Dividing each relative rate by this sum of products ensures that the sum of products, if computed again, would be equal to 1.0 (which is the goal).

    inline double Model::calcNormalizingConstantForSubsetRelRates() const { 
        // normalize _relrates so that expected relative rate across subsets equals 1.0
        double normalizing_constant = 0.0;
        for (unsigned s = 0; s &lt; _num_subsets; s++) {
            normalizing_constant += _subset_sizes[s]*_subset_relrates[s]/_num_sites;
        }
        return normalizing_constant;
    } 

The getSubsetSizes member function

This function returns a reference to the vector of subset sizes, _subset_sizes.

    inline Model::subset_sizes_t & Model::getSubsetSizes() { 
        return _subset_sizes;
    }   

The setSubsetSizes member function

This function uses std::copy to copy the subset sizes (i.e. the number of sites in each subset, supplied via the nsites_vect parameter) into the _subset_sizes data member. It then uses std::accumulate to sum all subset sizes and store the result in the data member _num_sites.

    inline void Model::setSubsetSizes(const subset_sizes_t nsites_vect) { 
        assert(nsites_vect.size() == _num_subsets);
        _subset_sizes.resize(_num_subsets);
        std::copy(nsites_vect.begin(), nsites_vect.end(), _subset_sizes.begin());
        _num_sites = std::accumulate(_subset_sizes.begin(), _subset_sizes.end(), 0);
    } 

The setSubsetNumPatterns member function

This function copies the supplied vector of pattern counts (npatterns_vect) to the data member _subset_npatterns using std::copy. The subset pattern counts are not needed by model; they are only supplied to the model to allow the model to print them out in the describeModel member function.

    inline void Model::setSubsetNumPatterns(const subset_sizes_t npatterns_vect) { 
        assert(npatterns_vect.size() == _num_subsets);
        _subset_npatterns.resize(_num_subsets);
        std::copy(npatterns_vect.begin(), npatterns_vect.end(), _subset_npatterns.begin());
    } 

The setSubsetDataTypes member function

This function should be called as soon as the data types associated with each partition subset are known. The function sets the _num_subsets data member to the number of partition subsets and resizes the _asrv, _qmatrix, and _subset_datatypes vectors to have length _num_subsets. The datatype_vect parameter is copied (using std::copy) to the _subset_datatypes vector and each of the _num_subsets elements of _asrv and _qmatrix are initialized with new ASRV or QMatrixNucleotide/QMatrixCodon elements, respectively.

    inline void Model::setSubsetDataTypes(const subset_datatype_t & datatype_vect) { 
        _num_subsets = (unsigned)datatype_vect.size();

        _qmatrix.clear();
        _qmatrix.resize(_num_subsets);

        _asrv.clear();
        _asrv.resize(_num_subsets);

        _subset_datatypes.resize(_num_subsets);
        std::copy(datatype_vect.begin(), datatype_vect.end(), _subset_datatypes.begin());

		_subset_relrates.assign(_num_subsets, 1.0);
        
        for (unsigned s = 0; s &lt; _num_subsets; s++) {
            _asrv[s].reset(new ASRV());
            if (_subset_datatypes[s].isNucleotide())
                _qmatrix[s].reset(new QMatrixNucleotide());
            else if (_subset_datatypes[s].isCodon()) {
                GeneticCode::SharedPtr gcptr = _subset_datatypes[s].getGeneticCode();
                _qmatrix[s].reset(new QMatrixCodon(gcptr));
                }
            else
                throw XStrom(boost::format("Only nucleotide or codon data allowed in this version, you specified data type \"%s\" for subset %d") % _subset_datatypes[s].getDataTypeAsString() % (s+1));
        }
    } 

The setSubsetNumCateg member function

This function sets the number of among-site rate categories. Everything about rate heterogeneity is handled by the ASRV data member, so, after performing a couple of sanity checks, the setNumCateg member function of the _asrv data member is called upon to store this value for the Model.

    inline void Model::setSubsetNumCateg(unsigned ncateg, unsigned subset) { 
        assert(subset &lt; _num_subsets);
        if (ncateg &lt; 1) {
            throw XStrom(boost::str(boost::format("number of categories used for among-site rate variation must be greater than zero but the value %d was supplied") % ncateg));
        }
        _asrv[subset]-&gt;setNumCateg(ncateg);
    } 

The setSubsetRateVar member function

This function sets the variance of among-site rates. Everything about among-site rate heterogeneity is handled by the ASRV data member, so, after performing a couple of sanity checks, the setRateVarSharedPtr member function of the _asrv data member is called upon to store this value for the Model. Note that ratevar is a shared_ptr rather than a double value, so it must be dereferenced using an asterisk in order to check whether the user has supplied a valid value. The fixed parameter determines whether this rate variance parameter will be updated during a subsequent analysis (e.g. MCMC).

    inline void Model::setSubsetRateVar(ASRV::ratevar_ptr_t ratevar, unsigned subset, bool fixed) { 
        assert(subset &lt; _num_subsets);
        assert(ratevar);
        if (*ratevar &lt; 0.0)
            throw XStrom(boost::str(boost::format("rate variance must be greater than or equal to zero but the value %.5f was supplied") % *ratevar));
        _asrv[subset]-&gt;setRateVarSharedPtr(ratevar);
        _asrv[subset]-&gt;fixRateVar(fixed);
    } 

The setSubsetPinvar member function

This function sets the proportion of invariable sites. Everything about among-site rate heterogeneity is handled by the ASRV data member, so, after performing sanity checks, the setPinvarSharedPtr member function of the _asrv data member is called upon to store this value for the Model. Note that pinvar is a shared_ptr rather than a double value, so it must be dereferenced using an asterisk in order to check whether the user has supplied a valid value. The fixed parameter determines whether this proportion of invariable site parameter will be updated during a subsequent analysis (e.g. MCMC).

    inline void Model::setSubsetPinvar(ASRV::pinvar_ptr_t pinvar, unsigned subset, bool fixed) { 
        assert(subset &lt; _num_subsets);
        assert(pinvar);
        if (*pinvar &lt; 0.0)
            throw XStrom(boost::str(boost::format("proportion of invariable sites must be greater than or equal to zero but the value %.5f was supplied") % *pinvar));
        if (*pinvar &gt;= 1.0)
            throw XStrom(boost::str(boost::format("proportion of invariable sites must be less than one but the value %.5f was supplied") % *pinvar));
        _asrv[subset]-&gt;setPinvarSharedPtr(pinvar);
        _asrv[subset]-&gt;fixPinvar(fixed);
    } 

The setSubsetIsInvarModel member function

The is_invar parameter of this function determines whether this model will be an invariable sites model for the partition subset indicated.

    inline void Model::setSubsetIsInvarModel(bool is_invar, unsigned subset) { 
        assert(subset &lt; _num_subsets);
        _asrv[subset]-&gt;setIsInvarModel(is_invar);
    } 

The setSubsetExchangeabilities member function

This function sets the exchangeabilities for a particular subset. The exchangeabilities are normalized to make sure they sum to 1 and sent to the QMatrix object pointed to by the data member _qmatrix, which is responsible for all data related to calculating the instantaneous rate matrix and its eigenvalues/eigenvectors.

    inline void Model::setSubsetExchangeabilities(QMatrix::freq_xchg_ptr_t exchangeabilities, unsigned subset, bool fixed) { 
        assert(subset &lt; _num_subsets);
        if (!_subset_datatypes[subset].isCodon()) {
            double first_xchg = (*exchangeabilities)[0];
            if (first_xchg == -1)
                _qmatrix[subset]-&gt;setEqualExchangeabilities(exchangeabilities);
            else
                _qmatrix[subset]-&gt;setExchangeabilitiesSharedPtr(exchangeabilities);
            _qmatrix[subset]-&gt;fixExchangeabilities(fixed);
        }
    } 

The setSubsetStateFreqs member function

This function sets the state frequencies for a particular subset. The frequencies are sent to the QMatrix object pointed to by the data member _qmatrix, which is responsible for all data related to calculating the instantaneous rate matrix and its eigenvalues/eigenvectors. It is possible that the user has specified the keyword equal for the state frequencies in the config file, in which case the first element of the parameter state_frequencies will be equal to -1. If that is the case, the QMatrix::setEqualStateFreqs member function is called instead of the QMatrix::setStateFreqsSharedPtr member function.

    inline void Model::setSubsetStateFreqs(QMatrix::freq_xchg_ptr_t state_frequencies, unsigned subset, bool fixed) { 
        assert(subset &lt; _num_subsets);
        double first_freq = (*state_frequencies)[0];
        if (first_freq == -1)
            _qmatrix[subset]-&gt;setEqualStateFreqs(state_frequencies);
        else
            _qmatrix[subset]-&gt;setStateFreqsSharedPtr(state_frequencies);
        _qmatrix[subset]-&gt;fixStateFreqs(fixed);
    } 

The setSubsetOmega member function

This function sets the omega parameter for a particular subset. The omega parameter represents the nonsynonymous/synonymous rate ratio used in codon models. The parameter omega is a shared pointer, hence the need for indirection using the asterisk in order to assert that its value is valid.

    inline void Model::setSubsetOmega(QMatrix::omega_ptr_t omega, unsigned subset, bool fixed) { 
        assert(subset &lt; _num_subsets);
        assert(*omega &gt; 0.0);
        if (_subset_datatypes[subset].isCodon()) {
            _qmatrix[subset]-&gt;setOmegaSharedPtr(omega);
            _qmatrix[subset]-&gt;fixOmega(fixed);
        }
    } 

The setTreeIndex, getTreeIndex, and isFixedTree member functions

The setTreeIndex function allows for telling the model which tree is to be used to compute the likelihood (or starting likelihood in the case of an MCMC analysis). The getTreeIndex can be used to determine which tree index is currently held by the model, and isFixedTree returns whether the tree is considered fixed (which only makes sense in the context of an MCMC analysis).

    inline void Model::setTreeIndex(unsigned i, bool fixed) {   
        _tree_index = i;
        _tree_fixed = fixed;
    }
    
    inline unsigned Model::getTreeIndex() const {
        return _tree_index;
    }
    
    inline bool Model::isFixedTree() const {
        return _tree_fixed;
    }  

The setSubsetRelRates member function

This function sets the elements of the _subset_relrates vector according to the values specified by the user. If the user specified “equal” then the first element of the relrates vector will equal -1. In this case, the relative rates for all subsets are set to the value 1.0. If the user specified that the subset relative rates should be fixed at the specified values, then the data member _subset_relrates_fixed will be set to true.

    inline void Model::setSubsetRelRates(subset_relrate_vect_t & relrates, bool fixed) {   
        assert(_num_subsets &gt; 0);
        assert(relrates.size() &gt; 0);
        if (relrates[0] == -1)
            _subset_relrates.assign(_num_subsets, 1.0);
        else
            _subset_relrates.assign(relrates.begin(), relrates.end());
        _subset_relrates_fixed = fixed;
    }   

The getSubsetRelRates member function

This accessor returns a reference to the _subset_relrates vector.

    inline Model::subset_relrate_vect_t & Model::getSubsetRelRates() {   
        return _subset_relrates;
    }   

The isFixedSubsetRelRates member function

This accessor returns the boolean value of the data member _subset_relrates_fixed.

    inline bool Model::isFixedSubsetRelRates() const {  
        return _subset_relrates_fixed;
    }   

The activate and inactivate member functions

This function is useful during model initiation: it is unnecessary to recalculate the eigensystem for the rate matrix when only part of the rate matrix has been specified. Hence the model should begin life in an inactivated state and the activate function can be called when it is set up and ready to go.

    inline void Model::activate() { 
        for (auto q : _qmatrix)
            q-&gt;setActive(true);
    }

    inline void Model::inactivate() {
        for (auto q : _qmatrix)
            q-&gt;setActive(false);
    } 

The setBeagleEigenDecomposition member function

This function provides eigenvalues, eigenvectors, and inverse eigenvectors to BeagleLib for use in computing transition probabilities.

    inline int Model::setBeagleEigenDecomposition(int beagle_instance, unsigned subset, unsigned instance_subset) { 
        assert(subset &lt; _qmatrix.size());
        const double * pevec = _qmatrix[subset]-&gt;getEigenvectors();
        const double * pivec = _qmatrix[subset]-&gt;getInverseEigenvectors();
        const double * pival = _qmatrix[subset]-&gt;getEigenvalues();
        int code = beagleSetEigenDecomposition(
            beagle_instance,    // Instance number (input)
            instance_subset,    // Index of eigen-decomposition buffer (input)
            pevec,              // Flattened matrix (stateCount x stateCount) of eigen-vectors (input)
            pivec,              // Flattened matrix (stateCount x stateCount) of inverse-eigen- vectors (input)
            pival);             // Vector of eigenvalues

        return code;
    } 

The setBeagleStateFrequencies member function

This function provides state frequencies to BeagleLib for use in computing transition probabilities and the log-likelihood.

    inline int Model::setBeagleStateFrequencies(int beagle_instance, unsigned subset, unsigned instance_subset) { 
        assert(subset &lt; _qmatrix.size());
        const double * pfreq = _qmatrix[subset]-&gt;getStateFreqs();
        int code = beagleSetStateFrequencies(
             beagle_instance,   // Instance number (input)
             instance_subset,   // Index of state frequencies buffer (input)
             pfreq);            // State frequencies array (stateCount) (input)

        return code;
    } 

The setBeagleAmongSiteRateVariationRates member function

This function provides the rates component of the among-site rate heterogeneity submodel to BeagleLib.

    inline int Model::setBeagleAmongSiteRateVariationRates(int beagle_instance, unsigned subset, unsigned instance_subset) { 
        assert(subset &lt; _asrv.size());
        const double * prates = _asrv[subset]-&gt;getRates();
        int code = beagleSetCategoryRatesWithIndex(
            beagle_instance,    // Instance number (input)
            instance_subset,    // Index of category rates buffer (input)
            prates);            // Array containing categoryCount rate scalers (input)

        return code;
    } 

The setBeagleAmongSiteRateVariationProbs member function

This function sends the vector of category weights to BeagleLib. The category weights are the probabilities of each category in the among-site rate heterogeneity model.

    inline int Model::setBeagleAmongSiteRateVariationProbs(int beagle_instance, unsigned subset, unsigned instance_subset) { 
        assert(subset &lt; _asrv.size());
        const double * pprobs = _asrv[subset]-&gt;getProbs();
        int code = beagleSetCategoryWeights(
            beagle_instance,    // Instance number (input)
            instance_subset,    // Index of category weights buffer (input)
            pprobs);            // Category weights array (categoryCount) (input)

        return code;
    } 

< 11.1 | 11.2 | 11.3 >