To add rescaling, we will need to modify several BeagleLib function calls inside our Likelihood
class.
Add new data member _underflow_scaling
(and a new public member function (useUnderflowScaling
) to set its value) to the class declaration .
class Likelihood {
public:
Likelihood();
~Likelihood();
void setRooted(bool is_rooted);
void setPreferGPU(bool prefer_gpu);
void setAmbiguityEqualsMissing(bool ambig_equals_missing);
bool usingStoredData() const;
void useStoredData(bool using_data);
<span style="color:#0000ff"><strong>void useUnderflowScaling(bool do_scaling);</strong></span>
std::string beagleLibVersion() const;
std::string availableResources() const;
std::string usedResources() const;
void initBeagleLib();
void finalizeBeagleLib(bool use_exceptions);
double calcLogLikelihood(Tree::SharedPtr t);
Data::SharedPtr getData();
void setData(Data::SharedPtr d);
Model::SharedPtr getModel();
void setModel(Model::SharedPtr m);
void clear();
unsigned calcNumEdgesInFullyResolvedTree() const;
unsigned calcNumInternalsInFullyResolvedTree() const;
private:
struct InstanceInfo {
int handle;
int resourcenumber;
std::string resourcename;
unsigned nstates;
unsigned nratecateg;
unsigned npatterns;
unsigned partial_offset;
unsigned tmatrix_offset;
bool invarmodel;
std::vector<unsigned> subsets;
InstanceInfo() : handle(-1), resourcenumber(-1), resourcename(""), nstates(0), nratecateg(0), npatterns(0), partial_offset(0), tmatrix_offset(0), invarmodel(false) {}
};
typedef std::pair<unsigned, int> instance_pair_t;
unsigned getScalerIndex(Node * nd, InstanceInfo & info) const;
unsigned getPartialIndex(Node * nd, InstanceInfo & info) const;
unsigned getTMatrixIndex(Node * nd, InstanceInfo & info, unsigned subset_index) const;
void updateInstanceMap(instance_pair_t & p, unsigned subset);
void newInstance(unsigned nstates, int nrates, std::vector<unsigned> & subset_indices);
void setTipStates();
void setTipPartials();
void setPatternPartitionAssignments();
void setPatternWeights();
void setAmongSiteRateHeterogenetity();
void setModelRateMatrix();
void addOperation(InstanceInfo & info, Node * nd, Node * lchild, Node * rchild, unsigned subset_index);
void queuePartialsRecalculation(Node * nd, Node * lchild, Node * rchild);
void queueTMatrixRecalculation(Node * nd);
void defineOperations(Tree::SharedPtr t);
void updateTransitionMatrices();
void calculatePartials();
double calcInstanceLogLikelihood(InstanceInfo & inst, Tree::SharedPtr t);
std::vector<InstanceInfo> _instances;
std::map<int, std::string> _beagle_error;
std::map<int, std::vector<int> > _operations;
std::map<int, std::vector<int> > _pmatrix_index;
std::map<int, std::vector<double> > _edge_lengths;
std::map<int, std::vector<int> > _eigen_indices;
std::map<int, std::vector<int> > _category_rate_indices;
double _relrate_normalizing_constant;
std::vector<int> _subset_indices;
std::vector<int> _parent_indices;
std::vector<int> _child_indices;
std::vector<int> _tmatrix_indices;
std::vector<int> _weights_indices;
std::vector<int> _freqs_indices;
std::vector<int> _scaling_indices;
Model::SharedPtr _model;
Data::SharedPtr _data;
unsigned _ntaxa;
bool _rooted;
bool _prefer_gpu;
bool _ambiguity_equals_missing;
<span style="color:#0000ff"><strong>bool _underflow_scaling;</strong></span>
bool _using_data;
public:
typedef std::shared_ptr< Likelihood > SharedPtr;
};
Set the new data member to false
in the clear function. We will turn scaling on or off using a program option, but the default will be to not scale.
inline void Likelihood::clear() {
finalizeBeagleLib(true);
_ntaxa = 0;
_rooted = false;
_prefer_gpu = false;
_ambiguity_equals_missing = true;
<span style="color:#0000ff"><strong>_underflow_scaling = false;</strong></span>
_using_data = true;
_data = nullptr;
_operations.clear();
_pmatrix_index.clear();
_edge_lengths.clear();
_eigen_indices.clear();
_category_rate_indices.clear();
_relrate_normalizing_constant = 1.0;
_subset_indices.assign(1, 0);
_parent_indices.assign(1, 0);
_child_indices.assign(1, 0);
_tmatrix_indices.assign(1, 0);
_weights_indices.assign(1, 0);
_freqs_indices.assign(1, 0);
_scaling_indices.assign(1, 0);
_model = Model::SharedPtr(new Model());
// Store BeagleLib error codes so that useful
// error messages may be provided to the user
_beagle_error.clear();
_beagle_error[0] = std::string("success");
_beagle_error[-1] = std::string("unspecified error");
_beagle_error[-2] = std::string("not enough memory could be allocated");
_beagle_error[-3] = std::string("unspecified exception");
_beagle_error[-4] = std::string("the instance index is out of range, or the instance has not been created");
_beagle_error[-5] = std::string("one of the indices specified exceeded the range of the array");
_beagle_error[-6] = std::string("no resource matches requirements");
_beagle_error[-7] = std::string("no implementation matches requirements");
_beagle_error[-8] = std::string("floating-point range exceeded");
}
Add the body of the new member function somewhere after the class declaration but before the namespace-closing right curly bracket.
inline void Likelihood::useUnderflowScaling(bool do_scaling) {
_underflow_scaling = do_scaling;
}
In the Likelihood::newInstance
function, add BEAGLE_FLAG_SCALING_MANUAL
and BEAGLE_FLAG_SCALERS_LOG
to the preference flags and tell BeagleLib to create num_internals + 1
scaling buffers (i.e. arrays). Each scaling buffer has one element for each pattern in the data, plus an extra element to store the cumulative scaling factors. If scaling is done at each internal node, there needs to be at least the same number of scaling buffers as there are internal nodes. The items to add or change are highlighted in blue.
inline void Likelihood::newInstance(unsigned nstates, int nrates, std::vector<unsigned> & subset_indices) {
unsigned num_subsets = (unsigned)subset_indices.size();
bool is_invar_model = (nrates < 0 ? true : false);
unsigned ngammacat = (unsigned)(is_invar_model ? -nrates : nrates);
unsigned num_patterns = 0;
for (auto s : subset_indices) {
num_patterns += _data->getNumPatternsInSubset(s);
}
unsigned num_internals = calcNumInternalsInFullyResolvedTree();
// add 1 to num_edges so that subroot node will have a tmatrix, root tip's tmatrix is never used
unsigned num_edges = calcNumEdgesInFullyResolvedTree();
unsigned num_nodes = num_edges + 1;
unsigned num_transition_probs = num_nodes*num_subsets;
long requirementFlags = 0;
long preferenceFlags = BEAGLE_FLAG_PRECISION_SINGLE | BEAGLE_FLAG_THREADING_CPP;
<span style="color:#0000ff"><strong>if (_underflow_scaling) {</strong></span>
<span style="color:#0000ff"><strong>preferenceFlags |= BEAGLE_FLAG_SCALING_MANUAL;</strong></span>
<span style="color:#0000ff"><strong>preferenceFlags |= BEAGLE_FLAG_SCALERS_LOG;</strong></span>
<span style="color:#0000ff"><strong>}</strong></span>
if (_prefer_gpu)
preferenceFlags |= BEAGLE_FLAG_PROCESSOR_GPU;
else
preferenceFlags |= BEAGLE_FLAG_PROCESSOR_CPU;
BeagleInstanceDetails instance_details;
unsigned npartials = num_internals + _ntaxa;
<span style="color:#0000ff"><strong>unsigned nscalers = num_internals; // one scale buffer for every internal node</strong></span>
unsigned nsequences = 0;
if (_ambiguity_equals_missing) {
npartials -= _ntaxa;
nsequences += _ntaxa;
}
int inst = beagleCreateInstance(
_ntaxa, // tips
npartials, // partials
nsequences, // sequences
nstates, // states
num_patterns, // patterns (total across all subsets that use this instance)
num_subsets, // models (one for each distinct eigen decomposition)
num_subsets*num_transition_probs, // transition matrices (one for each node in each subset)
ngammacat, // rate categories
<span style="color:#0000ff"><strong>(_underflow_scaling ? nscalers + 1 : 0), // scale buffers (+1 is for the cumulative scaler at index 0)</strong></span>
NULL, // resource restrictions
0, // length of resource list
preferenceFlags, // preferred flags
requirementFlags, // required flags
&instance_details); // pointer for details
if (inst < 0) {
// beagleCreateInstance returns one of the following:
// valid instance (0, 1, 2, ...)
// error code (negative integer)
throw XStrom(boost::str(boost::format("Likelihood init function failed to create BeagleLib instance (BeagleLib error code was %d)") % _beagle_error[inst]));
}
InstanceInfo info;
info.handle = inst;
info.resourcenumber = instance_details.resourceNumber;
info.resourcename = instance_details.resourceName;
info.nstates = nstates;
info.nratecateg = ngammacat;
info.invarmodel = is_invar_model;
info.subsets = subset_indices;
info.npatterns = num_patterns;
info.partial_offset = num_internals;
info.tmatrix_offset = num_nodes;
_instances.push_back(info);
}
Modify the Likelihood::getScalerIndex
function to tell BeagleLib which scaling buffer to use when computing the partials for a given internal node. We will reserve the first scaling buffer element to hold the cumulative sum of log scalers for each pattern, and because internal nodes are numbered starting with _ntaxa
, we need to subtract _ntaxa
from the internal node number and add 1 to get the index of the scaler buffer to use.
inline unsigned Likelihood::getScalerIndex(Node * nd, InstanceInfo & info) const {
<span style="color:#0000ff"><strong>unsigned sindex = BEAGLE_OP_NONE;</strong></span>
<span style="color:#0000ff"><strong>if (_underflow_scaling)</strong></span>
<span style="color:#0000ff"><strong>sindex = nd->_number - _ntaxa + 1; // +1 to skip the cumulative scaler vector</strong></span>
<span style="color:#0000ff"><strong>return sindex;</strong></span>
}
We’ve now provided BeagleLib with the appropriate number of scaling buffers, and, when operations are added to recalculate partials in addOperation
, scaling will be done because getScalerIndex
will return the appropriate scaling buffer index rather than BEAGLE_OP_NONE. All that is left is to harvest the scalers at each internal node and accumulate them for the final log-likelihood calculation.
The large section in blue in the code below first creates a list (internal_node_scaler_indices
) of all scaling buffer indices used (which is just the scaler index for every internal node).
If the instance is managing just one data subset (unpartitioned case), beagleResetScaleFactors
is used to zero out all elements of the cumulative_scale_index
, which is the first (0th) scaler buffer and beagleAccumulateScaleFactors
is used to sum up the log scalers (using scaler indices supplied via internal_node_scaler_indices
).
If the instance is managing more than one data subset (partitioned case), beagleResetScaleFactorsByPartition
is used to zero out all elements of the cumulative_scale_index
, which is the first (0th) scaler buffer and beagleAccumulateScaleFactorsByPartition
is used to sum up the log scalers (using scaler indices supplied via internal_node_scaler_indices
).
Because we are using just one cumulative scaling buffer for all site patterns regardless of their subset of origin, it is really not necessary to use the ByPartition versions of these functions, so you might save a tiny bit of computational effort by using the simpler versions for the partitioned case. The ByPartition versions simply limit their activity (in the cumulative scaler buffer) to patterns from subset s
, whereas beagleResetScaleFactors
and beagleResetScaleFactors
affect the entire cumulative scaler buffer.
The other highlighted lines simply specify that the scaler buffer with index 0 is where the cumulative scaling factors are stored when underflow scaling is being done.
inline double Likelihood::calcInstanceLogLikelihood(InstanceInfo & info, Tree::SharedPtr t) {
int code = 0;
unsigned nsubsets = (unsigned)info.subsets.size();
assert(nsubsets > 0);
// Assuming there are as many transition matrices as there are edge lengths
assert(_pmatrix_index[info.handle].size() == _edge_lengths[info.handle].size());
int state_frequency_index = 0;
int category_weights_index = 0;
<span style="color:#0000ff"><strong>int cumulative_scale_index = (_underflow_scaling ? 0 : BEAGLE_OP_NONE);</strong></span>
int child_partials_index = getPartialIndex(t->_root, info);
int parent_partials_index = getPartialIndex(t->_preorder[0], info);
int parent_tmatrix_index = getTMatrixIndex(t->_preorder[0], info, 0);
// storage for results of the likelihood calculation
std::vector<double> subset_log_likelihoods(nsubsets, 0.0);
double log_likelihood = 0.0;
<span style="color:#0000ff"><strong>if (_underflow_scaling) {</strong></span>
<span style="color:#0000ff"><strong>// Create vector of all scaling vector indices in current use</strong></span>
<span style="color:#0000ff"><strong>std::vector<int> internal_node_scaler_indices;</strong></span>
<span style="color:#0000ff"><strong>for (auto nd : t->_preorder) {</strong></span>
<span style="color:#0000ff"><strong>if (nd->_left_child) {</strong></span>
<span style="color:#0000ff"><strong>unsigned s = getScalerIndex(nd, info);</strong></span>
<span style="color:#0000ff"><strong>internal_node_scaler_indices.push_back(s);</strong></span>
<span style="color:#0000ff"><strong>}</strong></span>
<span style="color:#0000ff"><strong>}</strong></span>
<span style="color:#0000ff"><strong></strong></span>
<span style="color:#0000ff"><strong>if (nsubsets == 1) {</strong></span>
<span style="color:#0000ff"><strong>code = beagleResetScaleFactors(info.handle, cumulative_scale_index);</strong></span>
<span style="color:#0000ff"><strong>if (code != 0)</strong></span>
<span style="color:#0000ff"><strong>throw XStrom(boost::str(boost::format("failed to reset scale factors in calcInstanceLogLikelihood. BeagleLib error code was %d (%s)") % code % _beagle_error[code]));</strong></span>
<span style="color:#0000ff"><strong></strong></span>
<span style="color:#0000ff"><strong>code = beagleAccumulateScaleFactors(</strong></span>
<span style="color:#0000ff"><strong>info.handle,</strong></span>
<span style="color:#0000ff"><strong>&internal_node_scaler_indices[0],</strong></span>
<span style="color:#0000ff"><strong>(int)internal_node_scaler_indices.size(),</strong></span>
<span style="color:#0000ff"><strong>cumulative_scale_index);</strong></span>
<span style="color:#0000ff"><strong>if (code != 0)</strong></span>
<span style="color:#0000ff"><strong>throw XStrom(boost::str(boost::format("failed to accumulate scale factors in calcInstanceLogLikelihood. BeagleLib error code was %d (%s)") % code % _beagle_error[code]));</strong></span>
<span style="color:#0000ff"><strong>}</strong></span>
<span style="color:#0000ff"><strong>else {</strong></span>
<span style="color:#0000ff"><strong>for (unsigned s = 0; s < nsubsets; ++s) {</strong></span>
<span style="color:#0000ff"><strong>code = beagleResetScaleFactorsByPartition(info.handle, cumulative_scale_index, s);</strong></span>
<span style="color:#0000ff"><strong>if (code != 0)</strong></span>
<span style="color:#0000ff"><strong>throw XStrom(boost::str(boost::format("failed to reset scale factors for subset %d in calcInstanceLogLikelihood. BeagleLib error code was %d (%s)") % s % code % _beagle_error[code]));</strong></span>
<span style="color:#0000ff"><strong></strong></span>
<span style="color:#0000ff"><strong>code = beagleAccumulateScaleFactorsByPartition(</strong></span>
<span style="color:#0000ff"><strong>info.handle,</strong></span>
<span style="color:#0000ff"><strong>&internal_node_scaler_indices[0],</strong></span>
<span style="color:#0000ff"><strong>(int)internal_node_scaler_indices.size(),</strong></span>
<span style="color:#0000ff"><strong>cumulative_scale_index,</strong></span>
<span style="color:#0000ff"><strong>s);</strong></span>
<span style="color:#0000ff"><strong>if (code != 0)</strong></span>
<span style="color:#0000ff"><strong>throw XStrom(boost::str(boost::format("failed to acccumulate scale factors for subset %d in calcInstanceLogLikelihood. BeagleLib error code was %d (%s)") % s % code % _beagle_error[code]));</strong></span>
<span style="color:#0000ff"><strong>}</strong></span>
<span style="color:#0000ff"><strong>}</strong></span>
<span style="color:#0000ff"><strong>}</strong></span>
if (nsubsets > 1) {
_parent_indices.assign(nsubsets, parent_partials_index);
_child_indices.assign(nsubsets, child_partials_index);
_weights_indices.assign(nsubsets, category_weights_index);
_scaling_indices.resize(nsubsets);
_subset_indices.resize(nsubsets);
_freqs_indices.resize(nsubsets);
_tmatrix_indices.resize(nsubsets);
for (unsigned s = 0; s < nsubsets; s++) {
<span style="color:#0000ff"><strong>_scaling_indices[s] = (_underflow_scaling ? 0 : BEAGLE_OP_NONE);</strong></span>
_subset_indices[s] = s;
_freqs_indices[s] = s;
_tmatrix_indices[s] = getTMatrixIndex(t->_preorder[0], info, s); //index_focal_child + s*tmatrix_skip;
}
code = beagleCalculateEdgeLogLikelihoodsByPartition(
info.handle, // instance number
&_parent_indices[0], // indices of parent partialsBuffers
&_child_indices[0], // indices of child partialsBuffers
&_tmatrix_indices[0], // transition probability matrices for this edge
NULL, // first derivative matrices
NULL, // second derivative matrices
&_weights_indices[0], // weights to apply to each partialsBuffer
&_freqs_indices[0], // state frequencies for each partialsBuffer
&_scaling_indices[0], // scaleBuffers containing accumulated factors
&_subset_indices[0], // indices of subsets
nsubsets, // partition subset count
1, // number of distinct eigen decompositions
&subset_log_likelihoods[0], // address of vector of log likelihoods (one for each subset)
&log_likelihood, // destination for resulting log likelihood
NULL, // destination for vector of first derivatives (one for each subset)
NULL, // destination for first derivative
NULL, // destination for vector of second derivatives (one for each subset)
NULL); // destination for second derivative
}
else {
code = beagleCalculateEdgeLogLikelihoods(
info.handle, // instance number
&parent_partials_index, // indices of parent partialsBuffers
&child_partials_index, // indices of child partialsBuffers
&parent_tmatrix_index, // transition probability matrices for this edge
NULL, // first derivative matrices
NULL, // second derivative matrices
&category_weights_index, // weights to apply to each partialsBuffer
&state_frequency_index, // state frequencies for each partialsBuffer
&cumulative_scale_index, // scaleBuffers containing accumulated factors
1, // Number of partialsBuffer
&log_likelihood, // destination for log likelihood
NULL, // destination for first derivative
NULL); // destination for second derivative
}
// ...
Add a new data member to the Strom
class named _use_underflow_scaling
:
class Strom {
public:
Strom();
~Strom();
void clear();
void processCommandLineOptions(int argc, const char * argv[]);
void run();
private:
bool processAssignmentString(const std::string & which, const std::string & definition);
void handleAssignmentStrings(const boost::program_options::variables_map & vm, std::string label, const std::vector<std::string> & definitions, std::string default_definition);
bool splitAssignmentString(const std::string & definition, std::vector<std::string> & vector_of_subset_names, std::vector<double> & vector_of_values);
double _expected_log_likelihood;
std::string _data_file_name;
std::string _tree_file_name;
Partition::SharedPtr _partition;
Data::SharedPtr _data;
Model::SharedPtr _model;
Likelihood::SharedPtr _likelihood;
TreeSummary::SharedPtr _tree_summary;
bool _use_gpu;
bool _ambig_missing;
<span style="color:#0000ff"><strong>bool _use_underflow_scaling;</strong></span>
static std::string _program_name;
static unsigned _major_version;
static unsigned _minor_version;
};
Initialize it to false
in the clear
function:
inline void Strom::clear() {
_data_file_name = "";
_tree_file_name = "";
_tree_summary = nullptr;
_partition.reset(new Partition());
_use_gpu = true;
_ambig_missing = true;
_model.reset(new Model());
_expected_log_likelihood = 0.0;
_data = nullptr;
_likelihood = nullptr;
<span style="color:#0000ff"><strong>_use_underflow_scaling = false;</strong></span>
}
Add a program option to set it in processCommandLineOptions
:
inline void Strom::processCommandLineOptions(int argc, const char * argv[]) {
std::vector<std::string> partition_statefreq;
std::vector<std::string> partition_rmatrix;
std::vector<std::string> partition_omega;
std::vector<std::string> partition_ratevar;
std::vector<std::string> partition_pinvar;
std::vector<std::string> partition_ncateg;
std::vector<std::string> partition_subsets;
std::vector<std::string> partition_relrates;
std::vector<std::string> partition_tree;
boost::program_options::variables_map vm;
boost::program_options::options_description desc("Allowed options");
desc.add_options()
("help,h", "produce help message")
("version,v", "show program version")
("datafile,d", boost::program_options::value(&_data_file_name)->required(), "name of a data file in NEXUS format")
("treefile,t", boost::program_options::value(&_tree_file_name)->required(), "name of a tree file in NEXUS format")
("subset", boost::program_options::value(&partition_subsets), "a string defining a partition subset, e.g. 'first:1-1234\3' or 'default[codon:standard]:1-3702'")
("ncateg,c", boost::program_options::value(&partition_ncateg), "number of categories in the discrete Gamma rate heterogeneity model")
("statefreq", boost::program_options::value(&partition_statefreq), "a string defining state frequencies for one or more data subsets, e.g. 'first,second:0.1,0.2,0.3,0.4'")
("omega", boost::program_options::value(&partition_omega), "a string defining the nonsynonymous/synonymous rate ratio omega for one or more data subsets, e.g. 'first,second:0.1'")
("rmatrix", boost::program_options::value(&partition_rmatrix), "a string defining the rmatrix for one or more data subsets, e.g. 'first,second:1,2,1,1,2,1'")
("ratevar", boost::program_options::value(&partition_ratevar), "a string defining the among-site rate variance for one or more data subsets, e.g. 'first,second:2.5'")
("pinvar", boost::program_options::value(&partition_pinvar), "a string defining the proportion of invariable sites for one or more data subsets, e.g. 'first,second:0.2'")
("relrate", boost::program_options::value(&partition_relrates), "a string defining the (unnormalized) relative rates for all data subsets (e.g. 'default:3,1,6').")
("tree", boost::program_options::value(&partition_tree), "the index of the tree in the tree file (first tree has index = 1)")
("expectedLnL", boost::program_options::value(&_expected_log_likelihood)->default_value(0.0), "log likelihood expected")
("gpu", boost::program_options::value(&_use_gpu)->default_value(true), "use GPU if available")
("ambigmissing", boost::program_options::value(&_ambig_missing)->default_value(true), "treat all ambiguities as missing data")
<span style="color:#0000ff"><strong>("underflowscaling", boost::program_options::value(&_use_underflow_scaling)->default_value(false), "scale site-likelihoods to prevent underflow (slower but safer)")</strong></span>
;
// ...
Finally, call the function useUnderflowScaling
to tell the Likelihood
object whether the user wishes to use scaling:
inline void Strom::run() {
std::cout << "Starting..." << std::endl;
std::cout << "Current working directory: " << boost::filesystem::current_path() << std::endl;
try {
std::cout << "\n*** Reading and storing the data in the file " << _data_file_name << std::endl;
_data = Data::SharedPtr(new Data());
_data->setPartition(_partition);
_data->getDataFromFile(_data_file_name);
_model->setSubsetNumPatterns(_data->calcNumPatternsVect());
_model->setSubsetSizes(_partition->calcSubsetSizes());
_model->activate();
// Report information about data partition subsets
unsigned nsubsets = _data->getNumSubsets();
std::cout << "\nNumber of taxa: " << _data->getNumTaxa() << std::endl;
std::cout << "Number of partition subsets: " << nsubsets << std::endl;
for (unsigned subset = 0; subset < nsubsets; subset++) {
DataType dt = _partition->getDataTypeForSubset(subset);
std::cout << " Subset " << (subset+1) << " (" << _data->getSubsetName(subset) << ")" << std::endl;
std::cout << " data type: " << dt.getDataTypeAsString() << std::endl;
std::cout << " sites: " << _data->calcSeqLenInSubset(subset) << std::endl;
std::cout << " patterns: " << _data->getNumPatternsInSubset(subset) << std::endl;
std::cout << " ambiguity: " << (_ambig_missing || dt.isCodon() ? "treated as missing data (faster)" : "handled appropriately (slower)") << std::endl;
}
std::cout << "\n*** Resources available to BeagleLib " << _likelihood->beagleLibVersion() << ":\n";
std::cout << _likelihood->availableResources() << std::endl;
std::cout << "\n*** Creating the likelihood calculator" << std::endl;
_likelihood = Likelihood::SharedPtr(new Likelihood());
_likelihood->setPreferGPU(_use_gpu);
_likelihood->setAmbiguityEqualsMissing(_ambig_missing);
_likelihood->setData(_data);
<span style="color:#0000ff"><strong>_likelihood->useUnderflowScaling(_use_underflow_scaling);</strong></span>
std::cout << "\n*** Model description" << std::endl;
std::cout << _model->describeModel() << std::endl;
_likelihood->setModel(_model);
_likelihood->initBeagleLib();
std::cout << "\n*** Reading and storing the first tree in the file " << _tree_file_name << std::endl;
_tree_summary = TreeSummary::SharedPtr(new TreeSummary());
_tree_summary->readTreefile(_tree_file_name, 0);
Tree::SharedPtr tree = _tree_summary->getTree(0);
if (tree->numLeaves() != _data->getNumTaxa())
throw XStrom(boost::format("Number of taxa in tree (%d) does not equal the number of taxa in the data matrix (%d)") % tree->numLeaves() % _data->getNumTaxa());
std::cout << "\n*** Calculating the likelihood of the tree" << std::endl;
double lnL = _likelihood->calcLogLikelihood(tree);
std::cout << boost::str(boost::format("log likelihood = %.5f") % lnL) << std::endl;
if (_expected_log_likelihood != 0.0)
std::cout << boost::str(boost::format(" (expecting %.3f)") % _expected_log_likelihood) << std::endl;
}
catch (XStrom & x) {
std::cerr << "Strom encountered a problem:\n " << x.what() << std::endl;
}
std::cout << "\nFinished!" << std::endl;
}
If you now run your program using the strom.conf file below (which sets underflowscaling
to yes
to turn on scaling), you should find that it computes the log-likelihood correctly.
datafile = rbcl738.nex
treefile = rbcl738nj.tre
rmatrix = default: 0.08394222, 0.34116704, 0.03603322, 0.15737940, 0.30297095, 0.07850717
statefreq = default: 0.309769, 0.163380, 0.121023, 0.405828
ratevar = default:1.933185251
ncateg = default:4
<span style="color:#0000ff"><strong>underflowscaling = yes</strong></span>
expectedLnL = -144730.75
Adding rescaling has added an additional computational burden to our likelihood calculation. You can lessen the burden by not rescaling at every internal node. All that is needed to avoid rescaling for a particular internal node is to modify the operation association with that internal node, specifying BEAGLE_OP_NONE
instead of the node number for the 3rd element of the operation. Skimping too much, however, runs the risk of overflow. For now, we will rescale at every internal node (if the user specifies that scaling is to be done) just to be safe.