15.2 Adding Helper Functions to Data and Model

(Mac version)

< 15.1 | 15.2 | 15.3 >

The OutputManager class depends on (1) the Data::createTaxaBlock and Data::createTranslateStatement functions to create a taxa block and a translate statement, respectively, in its openTreeFile function; the Model::paramNamesAsString and Model::paramValuesAsString functions to obtain current model parameter names in its openParamFile function and values in its outputParameters function; and the TreeManip::makeNewick function to provide the newick tree description that it saves to the tree file in its outputTree function.

The last of these (TreeManip::makeNewick) already exists, and the following two sections add the other helper functions to the Data and Model classes.

Adding createTaxaBlock and createTranslateStatement to the Data class

Add the bold, blue lines to the Data class declaration in the file data.hpp.

    class Data {    
        public:
            typedef std::vector&lt;std::string&gt;            taxon_names_t;
            typedef unsigned long long                  state_t;
            typedef std::vector&lt;state_t&gt;                pattern_vect_t;
            typedef std::vector&lt;state_t&gt;                monomorphic_vect_t;
            typedef std::vector&lt;int&gt;                    partition_key_t;
            typedef std::map&lt;pattern_vect_t,unsigned&gt;   pattern_map_t;
            typedef std::vector&lt;pattern_vect_t&gt;         data_matrix_t;
            typedef std::vector&lt;pattern_map_t&gt;          pattern_map_vect_t;
            typedef std::vector&lt;double&gt;                 pattern_counts_t;
            typedef std::vector&lt;unsigned&gt;               subset_end_t;
            typedef std::vector&lt;unsigned&gt;               npatterns_vect_t;
            typedef std::pair&lt;unsigned, unsigned&gt;       begin_end_pair_t;
            typedef std::shared_ptr&lt;Data&gt;               SharedPtr;

                                                        Data();
                                                        ~Data();
        
            Partition::SharedPtr                        getPartition();
            void                                        setPartition(Partition::SharedPtr partition);

            void                                        getDataFromFile(const std::string filename);

            unsigned                                    getNumSubsets() const;
            std::string                                 getSubsetName(unsigned subset) const;

            unsigned                                    getNumTaxa() const;
            const taxon_names_t &                       getTaxonNames() const;

            unsigned                                    getNumPatterns() const;
            npatterns_vect_t                            calcNumPatternsVect() const;
            unsigned                                    getNumPatternsInSubset(unsigned subset) const;
            unsigned                                    getNumStatesForSubset(unsigned subset) const;
            unsigned                                    calcSeqLen() const;
            unsigned                                    calcSeqLenInSubset(unsigned subset) const;
            const data_matrix_t &                       getDataMatrix() const;
            begin_end_pair_t                            getSubsetBeginEnd(unsigned subset) const;
            const pattern_counts_t &                    getPatternCounts() const;
            const monomorphic_vect_t &                  getMonomorphic() const;
            const partition_key_t &                     getPartitionKey() const;

            <span style="color:#0000ff"><strong>std::string                                 createTaxaBlock() const;</strong></span>
            <span style="color:#0000ff"><strong>std::string                                 createTranslateStatement() const;</strong></span>

            void                                        clear();

        private:

            unsigned                                    storeTaxonNames(NxsTaxaBlock * taxaBlock, unsigned taxa_block_index);
            unsigned                                    storeData(unsigned ntax, unsigned nchar, NxsCharactersBlock * charBlock, NxsCharactersBlock::DataTypesEnum datatype);
            unsigned                                    buildSubsetSpecificMaps(unsigned ntaxa, unsigned seqlen, unsigned nsubsets);
            void                                        updatePatternMap(Data::pattern_vect_t & pattern, unsigned subset);
            void                                        compressPatterns();

            Partition::SharedPtr                        _partition;
            pattern_counts_t                            _pattern_counts;
            monomorphic_vect_t                          _monomorphic;
            partition_key_t                             _partition_key;
            pattern_map_vect_t                          _pattern_map_vect;
            taxon_names_t                               _taxon_names;
            data_matrix_t                               _data_matrix;
            subset_end_t                                _subset_end;
    };  

Now add the 2 function bodies below the class declaration but before the namespace-closing right curly bracket.

    inline std::string Data::createTaxaBlock() const {    
        std::string s = "";
        s += "begin taxa;\n";
        s += boost::str(boost::format("  dimensions ntax=%d;\n") % _taxon_names.size());
        s += "  taxlabels\n";
        for (auto nm : _taxon_names) {
            std::string taxon_name = std::regex_replace(nm, std::regex(" "), "_");
            s += "    " + taxon_name + "\n";
            }
        s += "    ;\n";
        s += "end;\n";
        return s;
    } 
    
    inline std::string Data::createTranslateStatement() const {
        std::string s = "";
        s += "  translate\n";
        unsigned t = 1;
        for (auto nm : _taxon_names) {
            std::string taxon_name = std::regex_replace(nm, std::regex(" "), "_");
            s += boost::str(boost::format("    %d %s%s\n") % t % taxon_name % (t &lt; _taxon_names.size() ? "," : ""));
            t++;
            }
        s += "  ;\n";
        return s;
    }    

Adding paramNamesAsString and paramValuesAsString to the model class

Add the bold, blue lines to the Model class declaration in the file model.hpp.

    class Model {   
        
        friend class Likelihood;

        public:
            typedef std::vector&lt;ASRV::SharedPtr&gt;      asrv_vect_t;
            typedef std::vector&lt;QMatrix::SharedPtr&gt;   qmat_vect_t;
            typedef std::vector&lt;unsigned&gt;             subset_sizes_t;
            typedef std::vector&lt;DataType&gt;             subset_datatype_t;
            typedef std::vector&lt;double&gt;               subset_relrate_vect_t;
            typedef std::vector&lt;QMatrix::SharedPtr&gt;   state_freq_params_t;
            typedef std::vector&lt;QMatrix::SharedPtr&gt;   exchangeability_params_t;
            typedef std::vector&lt;QMatrix::SharedPtr&gt;   omega_params_t;
            typedef std::vector&lt;ASRV::SharedPtr&gt;      ratevar_params_t;
            typedef std::vector&lt;ASRV::SharedPtr&gt;      pinvar_params_t;
            typedef boost::shared_ptr&lt;Model&gt;          SharedPtr;
        
                                        Model();
                                        ~Model();

            void                        activate();
            void                        inactivate();
            
            std::string                 describeModel();

            void                        setSubsetDataTypes(const subset_datatype_t & datatype_vect);

            void                        setSubsetRateVar(ASRV::ratevar_ptr_t ratevar, unsigned subset, bool fixed);
            void                        setSubsetPinvar(ASRV::pinvar_ptr_t pinvar, unsigned subset, bool fixed);
            void                        setSubsetExchangeabilities(QMatrix::freq_xchg_ptr_t exchangeabilities, unsigned subset, bool fixed);
            void                        setSubsetStateFreqs(QMatrix::freq_xchg_ptr_t state_frequencies, unsigned subset, bool fixed);
            void                        setSubsetOmega(QMatrix::omega_ptr_t omega, unsigned subset, bool fixed);

            void                        setSubsetRelRates(subset_relrate_vect_t & relrates, bool fixed);
            subset_relrate_vect_t &     getSubsetRelRates();
            bool                        isFixedSubsetRelRates() const;
            double                      calcNormalizingConstantForSubsetRelRates() const;

            void                        setTreeIndex(unsigned i, bool fixed);
            unsigned                    getTreeIndex() const;
            bool                        isFixedTree() const;

            unsigned                    getNumSubsets() const;
            unsigned                    getNumSites() const;
            unsigned                    getSubsetNumSites(unsigned subset) const;
            const QMatrix &             getQMatrix(unsigned subset) const;
            const ASRV &                getASRV(unsigned subset) const;

            void                        setSubsetIsInvarModel(bool is_invar, unsigned subset);
            bool                        getSubsetIsInvarModel(unsigned subset) const;

            void                        setSubsetSizes(const subset_sizes_t nsites_vect);
            subset_sizes_t &            getSubsetSizes();

            void                        setSubsetNumPatterns(const subset_sizes_t npatterns_vect);
            unsigned                    getSubsetNumPatterns(unsigned subset) const;

            void                        setSubsetNumCateg(unsigned ncateg, unsigned subset);
            unsigned                    getSubsetNumCateg(unsigned subset) const;

            state_freq_params_t &       getStateFreqParams();
            exchangeability_params_t &  getExchangeabilityParams();
            omega_params_t &            getOmegaParams();
            ratevar_params_t &          getRateVarParams();
            pinvar_params_t &           getPinvarParams();
        
            int                         setBeagleEigenDecomposition(int beagle_instance, unsigned subset, unsigned instance_subset);
            int                         setBeagleStateFrequencies(int beagle_instance, unsigned subset, unsigned instance_subset);
            int                         setBeagleAmongSiteRateVariationRates(int beagle_instance, unsigned subset, unsigned instance_subset);
            int                         setBeagleAmongSiteRateVariationProbs(int beagle_instance, unsigned subset, unsigned instance_subset);

            <span style="color:#0000ff"><strong>std::string                 paramNamesAsString(std::string sep) const;</strong></span>
            <span style="color:#0000ff"><strong>std::string                 paramValuesAsString(std::string sep) const;</strong></span>

        private:
        
            void                        clear();
        
            unsigned                    _num_subsets;
            unsigned                    _num_sites;
            subset_sizes_t              _subset_sizes;
            subset_sizes_t              _subset_npatterns;
            subset_datatype_t           _subset_datatypes;
            qmat_vect_t                 _qmatrix;
            asrv_vect_t                 _asrv;
        
            bool                        _tree_index;
            bool                        _tree_fixed;

            bool                        _subset_relrates_fixed;
            subset_relrate_vect_t       _subset_relrates;
        
            state_freq_params_t         _state_freq_params;
            exchangeability_params_t    _exchangeability_params;
            omega_params_t              _omega_params;
            ratevar_params_t            _ratevar_params;
            pinvar_params_t             _pinvar_params;
        };  

Add the function body for paramNamesAsString below the class declaration but before the namespace-closing right curly bracket. This creates a row of parameter names (headers) that serve as a key to the values in the columns below. The index of the subset to which each parameter belongs is incorporated into the name, with subset 0 being the first subset (even if there is only one subset).

    inline std::string Model::paramNamesAsString(std::string sep) const {   
        unsigned k;
        std::string s = "";
        if (_num_subsets &gt; 1) {
            for (k = 0; k &lt; _num_subsets; k++) {
                s += boost::str(boost::format("m-%d%s") % k % sep);
            }
        }
        for (k = 0; k &lt; _num_subsets; k++) {
            if (_subset_datatypes[k].isNucleotide()) {
                s += boost::str(boost::format("rAC-%d%srAG-%d%srAT-%d%srCG-%d%srCT-%d%srGT-%d%s") % k % sep % k % sep % k % sep % k % sep % k % sep % k % sep);
                s += boost::str(boost::format("piA-%d%spiC-%d%spiG-%d%spiT-%d%s") % k % sep % k % sep % k % sep % k % sep);
            }
            else if (_subset_datatypes[k].isCodon()) {
                s += boost::str(boost::format("omega-%d%s") % k % sep);
                for (std::string codon : _subset_datatypes[0].getGeneticCode()-&gt;_codons)
                    s += boost::str(boost::format("pi%s-%d%s") % codon % k % sep);
            }
            if (_asrv[k]-&gt;getIsInvarModel()) {
                s += boost::str(boost::format("pinvar-%d%s") % k % sep);
            }
            if (_asrv[k]-&gt;getNumCateg() &gt; 1) {
                s += boost::str(boost::format("ratevar-%d%s") % k % sep);
            }
        }
        return s;
    }   

Add the function body for paramValuesAsString below the class declaration but before the namespace-closing right curly bracket. This function is nearly identical to paramNamesAsString except that it outputs current parameter values rather than parameter names.

    inline std::string Model::paramValuesAsString(std::string sep) const {  
        unsigned k;
        std::string s = "";
        if (_num_subsets &gt; 1) {
            for (k = 0; k &lt; _num_subsets; k++) {
                s += boost::str(boost::format("%.5f%s") % _subset_relrates[k] % sep);
            }
        }
        for (k = 0; k &lt; _num_subsets; k++) {
            if (_subset_datatypes[k].isNucleotide()) {
                QMatrix::freq_xchg_t x = *_qmatrix[k]-&gt;getExchangeabilitiesSharedPtr();
                s += boost::str(boost::format("%.5f%s%.5f%s%.5f%s%.5f%s%.5f%s%.5f%s") % x[0] % sep % x[1] % sep % x[2] % sep % x[3] % sep % x[4] % sep % x[5] % sep);
                QMatrix::freq_xchg_t f = *_qmatrix[k]-&gt;getStateFreqsSharedPtr();
                s += boost::str(boost::format("%.5f%s%.5f%s%.5f%s%.5f%s") % f[0] % sep % f[1] % sep % f[2] % sep % f[3] % sep);
            }
            else if (_subset_datatypes[k].isCodon()) {
                s += boost::str(boost::format("%.5f%s") % _qmatrix[k]-&gt;getOmega() % sep);
                QMatrix::freq_xchg_t f = *_qmatrix[k]-&gt;getStateFreqsSharedPtr();
                for (unsigned m = 0; m &lt; _subset_datatypes[0].getNumStates(); m++)
                    s += boost::str(boost::format("%.5f%s") % f[m] % sep);
            }
            if (_asrv[k]-&gt;getIsInvarModel()) {
                s += boost::str(boost::format("%.5f%s") % _asrv[k]-&gt;getPinvar() % sep);
            }
            if (_asrv[k]-&gt;getNumCateg() &gt; 1) {
                s += boost::str(boost::format("%.5f%s") % _asrv[k]-&gt;getRateVar() % sep);
            }
        }
        return s;
    }   

< 15.1 | 15.2 | 15.3 >