12.2 Rescaling in BeagleLib

(Win version)

< 12.1 | 12.2 | 13.0 >

To add rescaling, we will need to modify several BeagleLib function calls inside our Likelihood class.

Add a data member to control whether scaling occurs

Add new data member _underflow_scaling (and a new public member function (useUnderflowScaling) to set its value) to the class declaration .

    class Likelihood {  
        public:
                                                    Likelihood();
                                                    ~Likelihood();

            void                                    setRooted(bool is_rooted);
            void                                    setPreferGPU(bool prefer_gpu);
            void                                    setAmbiguityEqualsMissing(bool ambig_equals_missing);
        
            bool                                    usingStoredData() const;
            void                                    useStoredData(bool using_data);
            <span style="color:#0000ff"><strong>void                                    useUnderflowScaling(bool do_scaling);</strong></span>

            std::string                             beagleLibVersion() const;
            std::string                             availableResources() const;
            std::string                             usedResources() const;

            void                                    initBeagleLib();
            void                                    finalizeBeagleLib(bool use_exceptions);

            double                                  calcLogLikelihood(Tree::SharedPtr t);

            Data::SharedPtr                         getData();
            void                                    setData(Data::SharedPtr d);

            Model::SharedPtr                        getModel();
            void                                    setModel(Model::SharedPtr m);

            void                                    clear();
            
            unsigned                                calcNumEdgesInFullyResolvedTree() const;
            unsigned                                calcNumInternalsInFullyResolvedTree() const;

        private:
        
            struct InstanceInfo {
                int handle;
                int resourcenumber;
                std::string resourcename;
                unsigned nstates;
                unsigned nratecateg;
                unsigned npatterns;
                unsigned partial_offset;
                unsigned tmatrix_offset;
                bool invarmodel;
                std::vector&lt;unsigned&gt; subsets;
                
                InstanceInfo() : handle(-1), resourcenumber(-1), resourcename(""), nstates(0), nratecateg(0), npatterns(0), partial_offset(0), tmatrix_offset(0), invarmodel(false) {}
            };

            typedef std::pair&lt;unsigned, int&gt;        instance_pair_t;

            unsigned                                getScalerIndex(Node * nd, InstanceInfo & info) const;
            unsigned                                getPartialIndex(Node * nd, InstanceInfo & info) const;
            unsigned                                getTMatrixIndex(Node * nd, InstanceInfo & info, unsigned subset_index) const;
            void                                    updateInstanceMap(instance_pair_t & p, unsigned subset);
            void                                    newInstance(unsigned nstates, int nrates, std::vector&lt;unsigned&gt; & subset_indices);
            void                                    setTipStates();
            void                                    setTipPartials();
            void                                    setPatternPartitionAssignments();
            void                                    setPatternWeights();
            void                                    setAmongSiteRateHeterogenetity();
            void                                    setModelRateMatrix();
            void                                    addOperation(InstanceInfo & info, Node * nd, Node * lchild, Node * rchild, unsigned subset_index);
            void                                    queuePartialsRecalculation(Node * nd, Node * lchild, Node * rchild);
            void                                    queueTMatrixRecalculation(Node * nd);
            void                                    defineOperations(Tree::SharedPtr t);
            void                                    updateTransitionMatrices();
            void                                    calculatePartials();
            double                                  calcInstanceLogLikelihood(InstanceInfo & inst, Tree::SharedPtr t);


            std::vector&lt;InstanceInfo&gt;               _instances;
            std::map&lt;int, std::string&gt;              _beagle_error;
            std::map&lt;int, std::vector&lt;int&gt; &gt;        _operations;
            std::map&lt;int, std::vector&lt;int&gt; &gt;        _pmatrix_index;
            std::map&lt;int, std::vector&lt;double&gt; &gt;     _edge_lengths;
            std::map&lt;int, std::vector&lt;int&gt; &gt;        _eigen_indices;
            std::map&lt;int, std::vector&lt;int&gt; &gt;        _category_rate_indices;
            double                                  _relrate_normalizing_constant;

            std::vector&lt;int&gt;                        _subset_indices;
            std::vector&lt;int&gt;                        _parent_indices;
            std::vector&lt;int&gt;                        _child_indices;
            std::vector&lt;int&gt;                        _tmatrix_indices;
            std::vector&lt;int&gt;                        _weights_indices;
            std::vector&lt;int&gt;                        _freqs_indices;
            std::vector&lt;int&gt;                        _scaling_indices;
        
            Model::SharedPtr                        _model;

            Data::SharedPtr                         _data;
            unsigned                                _ntaxa;
            bool                                    _rooted;
            bool                                    _prefer_gpu;
            bool                                    _ambiguity_equals_missing;
            <span style="color:#0000ff"><strong>bool                                    _underflow_scaling;</strong></span>
            bool                                    _using_data;

        public:
            typedef std::shared_ptr&lt; Likelihood &gt;   SharedPtr;
    };
    

Set the new data member to false in the clear function. We will turn scaling on or off using a program option, but the default will be to not scale.

    inline void Likelihood::clear() { 
        finalizeBeagleLib(true);
        
        _ntaxa                      = 0;
        _rooted                     = false;
        _prefer_gpu                 = false;
        _ambiguity_equals_missing   = true;
        <span style="color:#0000ff"><strong>_underflow_scaling          = false;</strong></span>
        _using_data                 = true;
        _data                       = nullptr;
        
        _operations.clear();
        _pmatrix_index.clear();
        _edge_lengths.clear();
        _eigen_indices.clear();
        _category_rate_indices.clear();
        _relrate_normalizing_constant = 1.0;
        _subset_indices.assign(1, 0);
        _parent_indices.assign(1, 0);
        _child_indices.assign(1, 0);
        _tmatrix_indices.assign(1, 0);
        _weights_indices.assign(1, 0);
        _freqs_indices.assign(1, 0);
        _scaling_indices.assign(1, 0);
        
        _model = Model::SharedPtr(new Model());        

        // Store BeagleLib error codes so that useful
        // error messages may be provided to the user
        _beagle_error.clear();
        _beagle_error[0]  = std::string("success");
        _beagle_error[-1] = std::string("unspecified error");
        _beagle_error[-2] = std::string("not enough memory could be allocated");
        _beagle_error[-3] = std::string("unspecified exception");
        _beagle_error[-4] = std::string("the instance index is out of range, or the instance has not been created");
        _beagle_error[-5] = std::string("one of the indices specified exceeded the range of the array");
        _beagle_error[-6] = std::string("no resource matches requirements");
        _beagle_error[-7] = std::string("no implementation matches requirements");
        _beagle_error[-8] = std::string("floating-point range exceeded");
    } 

Add the body of the new member function somewhere after the class declaration but before the namespace-closing right curly bracket.

    inline void Likelihood::useUnderflowScaling(bool do_scaling) { 
        _underflow_scaling = do_scaling;
    } 

Tell BeagleLib to create scale buffers

In the Likelihood::newInstance function, add BEAGLE_FLAG_SCALING_MANUAL and BEAGLE_FLAG_SCALERS_LOG to the preference flags and tell BeagleLib to create num_internals + 1 scaling buffers (i.e. arrays). Each scaling buffer has one element for each pattern in the data, plus an extra element to store the cumulative scaling factors. If scaling is done at each internal node, there needs to be at least the same number of scaling buffers as there are internal nodes. The items to add or change are highlighted in blue.

    inline void Likelihood::newInstance(unsigned nstates, int nrates, std::vector&lt;unsigned&gt; & subset_indices) { 
        unsigned num_subsets = (unsigned)subset_indices.size();
        
        bool is_invar_model = (nrates &lt; 0 ? true : false);
        unsigned ngammacat = (unsigned)(is_invar_model ? -nrates : nrates);
        
        unsigned num_patterns = 0;
        for (auto s : subset_indices) {
            num_patterns += _data-&gt;getNumPatternsInSubset(s);
        }
        
        unsigned num_internals = calcNumInternalsInFullyResolvedTree();

        // add 1 to num_edges so that subroot node will have a tmatrix, root tip's tmatrix is never used
        unsigned num_edges = calcNumEdgesInFullyResolvedTree();
        unsigned num_nodes = num_edges + 1;
        unsigned num_transition_probs = num_nodes*num_subsets;
        
        long requirementFlags = 0;

        long preferenceFlags = BEAGLE_FLAG_PRECISION_SINGLE | BEAGLE_FLAG_THREADING_CPP;
        <span style="color:#0000ff"><strong>if (_underflow_scaling) {</strong></span>
            <span style="color:#0000ff"><strong>preferenceFlags |= BEAGLE_FLAG_SCALING_MANUAL;</strong></span>
            <span style="color:#0000ff"><strong>preferenceFlags |= BEAGLE_FLAG_SCALERS_LOG;</strong></span>
        <span style="color:#0000ff"><strong>}</strong></span>
        if (_prefer_gpu)
            preferenceFlags |= BEAGLE_FLAG_PROCESSOR_GPU;
        else
            preferenceFlags |= BEAGLE_FLAG_PROCESSOR_CPU;
        
        BeagleInstanceDetails instance_details;
        unsigned npartials = num_internals + _ntaxa;
        <span style="color:#0000ff"><strong>unsigned nscalers = num_internals;  // one scale buffer for every internal node</strong></span>
        unsigned nsequences = 0;
        if (_ambiguity_equals_missing) {
            npartials -= _ntaxa;
            nsequences += _ntaxa;
        }
        
        int inst = beagleCreateInstance(
             _ntaxa,                                    // tips
             npartials,                                 // partials
             nsequences,                                // sequences
             nstates,                                   // states
             num_patterns,                              // patterns (total across all subsets that use this instance)
             num_subsets,                               // models (one for each distinct eigen decomposition)
             num_subsets*num_transition_probs,          // transition matrices (one for each node in each subset)
             ngammacat,                                 // rate categories
             <span style="color:#0000ff"><strong>(_underflow_scaling ? nscalers + 1 : 0),   // scale buffers  (+1 is for the cumulative scaler at index 0)</strong></span>
             NULL,                                      // resource restrictions
             0,                                         // length of resource list
             preferenceFlags,                           // preferred flags
             requirementFlags,                          // required flags
             &instance_details);                        // pointer for details
        
        if (inst &lt; 0) {
            // beagleCreateInstance returns one of the following:
            //   valid instance (0, 1, 2, ...)
            //   error code (negative integer)
            throw XStrom(boost::str(boost::format("Likelihood init function failed to create BeagleLib instance (BeagleLib error code was %d)") % _beagle_error[inst]));
        }
        
        InstanceInfo info;
        info.handle         = inst;
        info.resourcenumber = instance_details.resourceNumber;
        info.resourcename   = instance_details.resourceName;
        info.nstates        = nstates;
        info.nratecateg     = ngammacat;
        info.invarmodel     = is_invar_model;
        info.subsets        = subset_indices;
        info.npatterns      = num_patterns;
        info.partial_offset = num_internals;
        info.tmatrix_offset = num_nodes;
        _instances.push_back(info);
    }   

Modify getScalerIndex

Modify the Likelihood::getScalerIndex function to tell BeagleLib which scaling buffer to use when computing the partials for a given internal node. We will reserve the first scaling buffer element to hold the cumulative sum of log scalers for each pattern, and because internal nodes are numbered starting with _ntaxa, we need to subtract _ntaxa from the internal node number and add 1 to get the index of the scaler buffer to use.

    inline unsigned Likelihood::getScalerIndex(Node * nd, InstanceInfo & info) const {  
        <span style="color:#0000ff"><strong>unsigned sindex = BEAGLE_OP_NONE;</strong></span>
        <span style="color:#0000ff"><strong>if (_underflow_scaling)</strong></span>
            <span style="color:#0000ff"><strong>sindex = nd-&gt;_number - _ntaxa + 1; // +1 to skip the cumulative scaler vector</strong></span>
        <span style="color:#0000ff"><strong>return sindex;</strong></span>
    }   

Accumulate scalers in calcInstanceLogLikelihood

We’ve now provided BeagleLib with the appropriate number of scaling buffers, and, when operations are added to recalculate partials in addOperation, scaling will be done because getScalerIndex will return the appropriate scaling buffer index rather than BEAGLE_OP_NONE. All that is left is to harvest the scalers at each internal node and accumulate them for the final log-likelihood calculation.

The large section in blue in the code below first creates a list (internal_node_scaler_indices) of all scaling buffer indices used (which is just the scaler index for every internal node).

If the instance is managing just one data subset (unpartitioned case), beagleResetScaleFactors is used to zero out all elements of the cumulative_scale_index, which is the first (0th) scaler buffer and beagleAccumulateScaleFactors is used to sum up the log scalers (using scaler indices supplied via internal_node_scaler_indices).

If the instance is managing more than one data subset (partitioned case), beagleResetScaleFactorsByPartition is used to zero out all elements of the cumulative_scale_index, which is the first (0th) scaler buffer and beagleAccumulateScaleFactorsByPartition is used to sum up the log scalers (using scaler indices supplied via internal_node_scaler_indices).

Because we are using just one cumulative scaling buffer for all site patterns regardless of their subset of origin, it is really not necessary to use the ByPartition versions of these functions, so you might save a tiny bit of computational effort by using the simpler versions for the partitioned case. The ByPartition versions simply limit their activity (in the cumulative scaler buffer) to patterns from subset s, whereas beagleResetScaleFactors and beagleResetScaleFactors affect the entire cumulative scaler buffer.

The other highlighted lines simply specify that the scaler buffer with index 0 is where the cumulative scaling factors are stored when underflow scaling is being done.

    inline double Likelihood::calcInstanceLogLikelihood(InstanceInfo & info, Tree::SharedPtr t) {   
        int code = 0;
        unsigned nsubsets = (unsigned)info.subsets.size();
        assert(nsubsets &gt; 0);

        // Assuming there are as many transition matrices as there are edge lengths
        assert(_pmatrix_index[info.handle].size() == _edge_lengths[info.handle].size());

        int state_frequency_index  = 0;
        int category_weights_index = 0;
        <span style="color:#0000ff"><strong>int cumulative_scale_index = (_underflow_scaling ? 0 : BEAGLE_OP_NONE);</strong></span>
        int child_partials_index   = getPartialIndex(t-&gt;_root, info);
        int parent_partials_index  = getPartialIndex(t-&gt;_preorder[0], info);
        int parent_tmatrix_index   = getTMatrixIndex(t-&gt;_preorder[0], info, 0);

        // storage for results of the likelihood calculation
        std::vector&lt;double&gt; subset_log_likelihoods(nsubsets, 0.0);
        double log_likelihood = 0.0;

        <span style="color:#0000ff"><strong>if (_underflow_scaling) {</strong></span>
            <span style="color:#0000ff"><strong>// Create vector of all scaling vector indices in current use</strong></span>
            <span style="color:#0000ff"><strong>std::vector&lt;int&gt; internal_node_scaler_indices;</strong></span>
            <span style="color:#0000ff"><strong>for (auto nd : t-&gt;_preorder) {</strong></span>
                <span style="color:#0000ff"><strong>if (nd-&gt;_left_child) {</strong></span>
                    <span style="color:#0000ff"><strong>unsigned s = getScalerIndex(nd, info);</strong></span>
                    <span style="color:#0000ff"><strong>internal_node_scaler_indices.push_back(s);</strong></span>
                <span style="color:#0000ff"><strong>}</strong></span>
            <span style="color:#0000ff"><strong>}</strong></span>
            
<span style="color:#0000ff"><strong></strong></span>
            <span style="color:#0000ff"><strong>if (nsubsets == 1) {</strong></span>
                <span style="color:#0000ff"><strong>code = beagleResetScaleFactors(info.handle, cumulative_scale_index);</strong></span>
                <span style="color:#0000ff"><strong>if (code != 0)</strong></span>
                    <span style="color:#0000ff"><strong>throw XStrom(boost::str(boost::format("failed to reset scale factors in calcInstanceLogLikelihood. BeagleLib error code was %d (%s)") % code % _beagle_error[code]));</strong></span>

<span style="color:#0000ff"><strong></strong></span>
                <span style="color:#0000ff"><strong>code = beagleAccumulateScaleFactors(</strong></span>
                     <span style="color:#0000ff"><strong>info.handle,</strong></span>
                     <span style="color:#0000ff"><strong>&internal_node_scaler_indices[0],</strong></span>
                     <span style="color:#0000ff"><strong>(int)internal_node_scaler_indices.size(),</strong></span>
                     <span style="color:#0000ff"><strong>cumulative_scale_index);</strong></span>
                <span style="color:#0000ff"><strong>if (code != 0)</strong></span>
                    <span style="color:#0000ff"><strong>throw XStrom(boost::str(boost::format("failed to accumulate scale factors in calcInstanceLogLikelihood. BeagleLib error code was %d (%s)") % code % _beagle_error[code]));</strong></span>
            <span style="color:#0000ff"><strong>}</strong></span>
            <span style="color:#0000ff"><strong>else {</strong></span>
                <span style="color:#0000ff"><strong>for (unsigned s = 0; s &lt; nsubsets; ++s) {</strong></span>
                    <span style="color:#0000ff"><strong>code = beagleResetScaleFactorsByPartition(info.handle, cumulative_scale_index, s);</strong></span>
                    <span style="color:#0000ff"><strong>if (code != 0)</strong></span>
                        <span style="color:#0000ff"><strong>throw XStrom(boost::str(boost::format("failed to reset scale factors for subset %d in calcInstanceLogLikelihood. BeagleLib error code was %d (%s)") % s % code % _beagle_error[code]));</strong></span>
                        
<span style="color:#0000ff"><strong></strong></span>
                    <span style="color:#0000ff"><strong>code = beagleAccumulateScaleFactorsByPartition(</strong></span>
                        <span style="color:#0000ff"><strong>info.handle,</strong></span>
                        <span style="color:#0000ff"><strong>&internal_node_scaler_indices[0],</strong></span>
                        <span style="color:#0000ff"><strong>(int)internal_node_scaler_indices.size(),</strong></span>
                        <span style="color:#0000ff"><strong>cumulative_scale_index,</strong></span>
                        <span style="color:#0000ff"><strong>s);</strong></span>
                    <span style="color:#0000ff"><strong>if (code != 0)</strong></span>
                        <span style="color:#0000ff"><strong>throw XStrom(boost::str(boost::format("failed to acccumulate scale factors for subset %d in calcInstanceLogLikelihood. BeagleLib error code was %d (%s)") % s % code % _beagle_error[code]));</strong></span>
                <span style="color:#0000ff"><strong>}</strong></span>
            <span style="color:#0000ff"><strong>}</strong></span>
        <span style="color:#0000ff"><strong>}</strong></span>

        if (nsubsets &gt; 1) {
            _parent_indices.assign(nsubsets, parent_partials_index);
            _child_indices.assign(nsubsets, child_partials_index);
            _weights_indices.assign(nsubsets, category_weights_index);
            _scaling_indices.resize(nsubsets);
            _subset_indices.resize(nsubsets);
            _freqs_indices.resize(nsubsets);
            _tmatrix_indices.resize(nsubsets);

            for (unsigned s = 0; s &lt; nsubsets; s++) {
                <span style="color:#0000ff"><strong>_scaling_indices[s]  = (_underflow_scaling ? 0 : BEAGLE_OP_NONE);</strong></span>
                _subset_indices[s]  = s;
                _freqs_indices[s]   = s;
                _tmatrix_indices[s] = getTMatrixIndex(t-&gt;_preorder[0], info, s); //index_focal_child + s*tmatrix_skip;
            }
            
            code = beagleCalculateEdgeLogLikelihoodsByPartition(
                info.handle,                 // instance number
                &_parent_indices[0],         // indices of parent partialsBuffers
                &_child_indices[0],          // indices of child partialsBuffers
                &_tmatrix_indices[0],        // transition probability matrices for this edge
                NULL,                        // first derivative matrices
                NULL,                        // second derivative matrices
                &_weights_indices[0],        // weights to apply to each partialsBuffer
                &_freqs_indices[0],          // state frequencies for each partialsBuffer
                &_scaling_indices[0],        // scaleBuffers containing accumulated factors
                &_subset_indices[0],         // indices of subsets
                nsubsets,                    // partition subset count
                1,                           // number of distinct eigen decompositions
                &subset_log_likelihoods[0],  // address of vector of log likelihoods (one for each subset)
                &log_likelihood,             // destination for resulting log likelihood
                NULL,                        // destination for vector of first derivatives (one for each subset)
                NULL,                        // destination for first derivative
                NULL,                        // destination for vector of second derivatives (one for each subset)
                NULL);                       // destination for second derivative
        }
        else {
            code = beagleCalculateEdgeLogLikelihoods(
                info.handle,                 // instance number
                &parent_partials_index,      // indices of parent partialsBuffers
                &child_partials_index,       // indices of child partialsBuffers
                &parent_tmatrix_index,       // transition probability matrices for this edge
                NULL,                        // first derivative matrices
                NULL,                        // second derivative matrices
                &category_weights_index,     // weights to apply to each partialsBuffer
                &state_frequency_index,      // state frequencies for each partialsBuffer
                &cumulative_scale_index,     // scaleBuffers containing accumulated factors
                1,                           // Number of partialsBuffer
                &log_likelihood,             // destination for log likelihood
                NULL,                        // destination for first derivative
                NULL);                       // destination for second derivative
        }
        
        // ... 

Modify Strom to add program option for scaling

Add a new data member to the Strom class named _use_underflow_scaling:

    class Strom {   
        public:
                                                    Strom();
                                                    ~Strom();

            void                                    clear();
            void                                    processCommandLineOptions(int argc, const char * argv[]);
            void                                    run();
        
        private:
            bool                                    processAssignmentString(const std::string & which, const std::string & definition);
            void                                    handleAssignmentStrings(const boost::program_options::variables_map & vm, std::string label, const std::vector&lt;std::string&gt; & definitions, std::string default_definition);
            bool                                    splitAssignmentString(const std::string & definition, std::vector&lt;std::string&gt; & vector_of_subset_names, std::vector&lt;double&gt;  & vector_of_values);

            double                                  _expected_log_likelihood;

            std::string                             _data_file_name;
            std::string                             _tree_file_name;
            Partition::SharedPtr                    _partition;

            Data::SharedPtr                         _data;
            Model::SharedPtr                        _model;
            Likelihood::SharedPtr                   _likelihood;
            TreeSummary::SharedPtr                  _tree_summary;

            bool                                    _use_gpu;
            bool                                    _ambig_missing;
            <span style="color:#0000ff"><strong>bool                                    _use_underflow_scaling;</strong></span>

            static std::string                      _program_name;
            static unsigned                         _major_version;
            static unsigned                         _minor_version;

    };
    

Initialize it to false in the clear function:

    inline void Strom::clear() {    
        _data_file_name             = "";
        _tree_file_name             = "";
        _tree_summary               = nullptr;
        _partition.reset(new Partition());
        _use_gpu                    = true;
        _ambig_missing              = true;
        _model.reset(new Model());
        _expected_log_likelihood    = 0.0;
        _data                       = nullptr; 
        _likelihood                 = nullptr;
        <span style="color:#0000ff"><strong>_use_underflow_scaling      = false;</strong></span>
    }   

Add a program option to set it in processCommandLineOptions:

    inline void Strom::processCommandLineOptions(int argc, const char * argv[]) {   
        std::vector&lt;std::string&gt; partition_statefreq;
        std::vector&lt;std::string&gt; partition_rmatrix;
        std::vector&lt;std::string&gt; partition_omega;
        std::vector&lt;std::string&gt; partition_ratevar;
        std::vector&lt;std::string&gt; partition_pinvar;
        std::vector&lt;std::string&gt; partition_ncateg;
        std::vector&lt;std::string&gt; partition_subsets;
        std::vector&lt;std::string&gt; partition_relrates;
        std::vector&lt;std::string&gt; partition_tree;
        boost::program_options::variables_map vm;
        boost::program_options::options_description desc("Allowed options");
        desc.add_options()
            ("help,h", "produce help message")
            ("version,v", "show program version")
            ("datafile,d",  boost::program_options::value(&_data_file_name)-&gt;required(), "name of a data file in NEXUS format")
            ("treefile,t",  boost::program_options::value(&_tree_file_name)-&gt;required(), "name of a tree file in NEXUS format")
            ("subset",  boost::program_options::value(&partition_subsets), "a string defining a partition subset, e.g. 'first:1-1234\3' or 'default[codon:standard]:1-3702'")
            ("ncateg,c", boost::program_options::value(&partition_ncateg), "number of categories in the discrete Gamma rate heterogeneity model")
            ("statefreq", boost::program_options::value(&partition_statefreq), "a string defining state frequencies for one or more data subsets, e.g. 'first,second:0.1,0.2,0.3,0.4'")
            ("omega", boost::program_options::value(&partition_omega), "a string defining the nonsynonymous/synonymous rate ratio omega for one or more data subsets, e.g. 'first,second:0.1'")
            ("rmatrix", boost::program_options::value(&partition_rmatrix), "a string defining the rmatrix for one or more data subsets, e.g. 'first,second:1,2,1,1,2,1'")
            ("ratevar", boost::program_options::value(&partition_ratevar), "a string defining the among-site rate variance for one or more data subsets, e.g. 'first,second:2.5'")
            ("pinvar", boost::program_options::value(&partition_pinvar), "a string defining the proportion of invariable sites for one or more data subsets, e.g. 'first,second:0.2'")
            ("relrate", boost::program_options::value(&partition_relrates), "a string defining the (unnormalized) relative rates for all data subsets (e.g. 'default:3,1,6').")
            ("tree", boost::program_options::value(&partition_tree), "the index of the tree in the tree file (first tree has index = 1)")
            ("expectedLnL", boost::program_options::value(&_expected_log_likelihood)-&gt;default_value(0.0), "log likelihood expected")
            ("gpu",           boost::program_options::value(&_use_gpu)-&gt;default_value(true),                "use GPU if available")
            ("ambigmissing",  boost::program_options::value(&_ambig_missing)-&gt;default_value(true),          "treat all ambiguities as missing data")
            <span style="color:#0000ff"><strong>("underflowscaling",  boost::program_options::value(&_use_underflow_scaling)-&gt;default_value(false),          "scale site-likelihoods to prevent underflow (slower but safer)")</strong></span>
        ;
        // ... 

Finally, call the function useUnderflowScaling to tell the Likelihood object whether the user wishes to use scaling:

    inline void Strom::run() {  
        std::cout &lt;&lt; "Starting..." &lt;&lt; std::endl;
        std::cout &lt;&lt; "Current working directory: " &lt;&lt; boost::filesystem::current_path() &lt;&lt; std::endl;

        try {
            std::cout &lt;&lt; "\n*** Reading and storing the data in the file " &lt;&lt; _data_file_name &lt;&lt; std::endl;
            _data = Data::SharedPtr(new Data());
            _data-&gt;setPartition(_partition);
            _data-&gt;getDataFromFile(_data_file_name);
            
            _model-&gt;setSubsetNumPatterns(_data-&gt;calcNumPatternsVect());
            _model-&gt;setSubsetSizes(_partition-&gt;calcSubsetSizes());
            _model-&gt;activate();

            // Report information about data partition subsets
            unsigned nsubsets = _data-&gt;getNumSubsets();
            std::cout &lt;&lt; "\nNumber of taxa: " &lt;&lt; _data-&gt;getNumTaxa() &lt;&lt; std::endl;
            std::cout &lt;&lt; "Number of partition subsets: " &lt;&lt; nsubsets &lt;&lt; std::endl;
            for (unsigned subset = 0; subset &lt; nsubsets; subset++) {
                DataType dt = _partition-&gt;getDataTypeForSubset(subset);
                std::cout &lt;&lt; "  Subset " &lt;&lt; (subset+1) &lt;&lt; " (" &lt;&lt; _data-&gt;getSubsetName(subset) &lt;&lt; ")" &lt;&lt; std::endl;
                std::cout &lt;&lt; "    data type: " &lt;&lt; dt.getDataTypeAsString() &lt;&lt; std::endl;
                std::cout &lt;&lt; "    sites:     " &lt;&lt; _data-&gt;calcSeqLenInSubset(subset) &lt;&lt; std::endl;
                std::cout &lt;&lt; "    patterns:  " &lt;&lt; _data-&gt;getNumPatternsInSubset(subset) &lt;&lt; std::endl;
                std::cout &lt;&lt; "    ambiguity: " &lt;&lt; (_ambig_missing || dt.isCodon() ? "treated as missing data (faster)" : "handled appropriately (slower)") &lt;&lt; std::endl;
                }

            std::cout &lt;&lt; "\n*** Resources available to BeagleLib " &lt;&lt; _likelihood-&gt;beagleLibVersion() &lt;&lt; ":\n";
            std::cout &lt;&lt; _likelihood-&gt;availableResources() &lt;&lt; std::endl;

            std::cout &lt;&lt; "\n*** Creating the likelihood calculator" &lt;&lt; std::endl;
            _likelihood = Likelihood::SharedPtr(new Likelihood());
            _likelihood-&gt;setPreferGPU(_use_gpu);
            _likelihood-&gt;setAmbiguityEqualsMissing(_ambig_missing);
            _likelihood-&gt;setData(_data);
            <span style="color:#0000ff"><strong>_likelihood-&gt;useUnderflowScaling(_use_underflow_scaling);</strong></span>

            std::cout &lt;&lt; "\n*** Model description" &lt;&lt; std::endl;
            std::cout &lt;&lt; _model-&gt;describeModel() &lt;&lt; std::endl;
            _likelihood-&gt;setModel(_model);

            _likelihood-&gt;initBeagleLib();

            std::cout &lt;&lt; "\n*** Reading and storing the first tree in the file " &lt;&lt; _tree_file_name &lt;&lt; std::endl;
            _tree_summary = TreeSummary::SharedPtr(new TreeSummary());
            _tree_summary-&gt;readTreefile(_tree_file_name, 0);
            Tree::SharedPtr tree = _tree_summary-&gt;getTree(0);
            
            if (tree-&gt;numLeaves() != _data-&gt;getNumTaxa())
                throw XStrom(boost::format("Number of taxa in tree (%d) does not equal the number of taxa in the data matrix (%d)") % tree-&gt;numLeaves() % _data-&gt;getNumTaxa());

            std::cout &lt;&lt; "\n*** Calculating the likelihood of the tree" &lt;&lt; std::endl;
            double lnL = _likelihood-&gt;calcLogLikelihood(tree);
            std::cout &lt;&lt; boost::str(boost::format("log likelihood = %.5f") % lnL) &lt;&lt; std::endl;
            
            if (_expected_log_likelihood != 0.0) 
                std::cout &lt;&lt; boost::str(boost::format("      (expecting %.3f)") % _expected_log_likelihood) &lt;&lt; std::endl;
            
        }
        catch (XStrom & x) {
            std::cerr &lt;&lt; "Strom encountered a problem:\n  " &lt;&lt; x.what() &lt;&lt; std::endl;
        }

        std::cout &lt;&lt; "\nFinished!" &lt;&lt; std::endl;
    }   

Run the program again

If you now run your program using the strom.conf file below (which sets underflowscaling to yes to turn on scaling), you should find that it computes the log-likelihood correctly.

datafile         = rbcl738.nex
treefile         = rbcl738nj.tre
rmatrix          = default: 0.08394222, 0.34116704, 0.03603322, 0.15737940, 0.30297095, 0.07850717
statefreq        = default: 0.309769, 0.163380, 0.121023, 0.405828
ratevar          = default:1.933185251
ncateg           = default:4
<span style="color:#0000ff"><strong>underflowscaling = yes</strong></span>
expectedLnL      = -144730.75

Tradeoffs

Adding rescaling has added an additional computational burden to our likelihood calculation. You can lessen the burden by not rescaling at every internal node. All that is needed to avoid rescaling for a particular internal node is to modify the operation association with that internal node, specifying BEAGLE_OP_NONE instead of the node number for the 3rd element of the operation. Skimping too much, however, runs the risk of overflow. For now, we will rescale at every internal node (if the user specifies that scaling is to be done) just to be safe.