All that is needed now is to create an OutputManager
object and call its functions at the appropriate places. The parts that need to be added (or modified) in the Strom
class declaration in the file strom.hpp are marked with bold, blue text below.
#pragma once
#include <iostream>
#include "data.hpp"
#include "likelihood.hpp"
#include "tree_summary.hpp"
#include "partition.hpp"
#include "lot.hpp"
#include "chain.hpp"
<span style="color:#0000ff"><strong>#include "output_manager.hpp"</strong></span>
#include <boost/program_options.hpp>
#include <boost/filesystem.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/classification.hpp>
namespace strom {
class Strom {
public:
Strom();
~Strom();
void clear();
void processCommandLineOptions(int argc, const char * argv[]);
void run();
private:
bool processAssignmentString(const std::string & which, const std::string & definition);
void handleAssignmentStrings(const boost::program_options::variables_map & vm, std::string label, const std::vector<std::string> & definitions, std::string default_definition);
bool splitAssignmentString(const std::string & definition, std::vector<std::string> & vector_of_subset_names, std::vector<double> & vector_of_values);
<span style="color:#0000ff"><strong>void sample(unsigned iter, Chain & chain);</strong></span>
double _expected_log_likelihood;
std::string _data_file_name;
std::string _tree_file_name;
Partition::SharedPtr _partition;
Data::SharedPtr _data;
Model::SharedPtr _model;
Likelihood::SharedPtr _likelihood;
TreeSummary::SharedPtr _tree_summary;
Lot::SharedPtr _lot;
unsigned _random_seed;
unsigned _num_iter;
unsigned _print_freq;
unsigned _sample_freq;
bool _use_gpu;
bool _ambig_missing;
bool _use_underflow_scaling;
static std::string _program_name;
static unsigned _major_version;
static unsigned _minor_version;
<span style="color:#0000ff"><strong>OutputManager::SharedPtr _output_manager;</strong></span>
};
Add the indicated line to the clear
function body to initialize the _output_manager
data member.
inline void Strom::clear() {
_data_file_name = "";
_tree_file_name = "";
_tree_summary = nullptr;
_partition.reset(new Partition());
_use_gpu = true;
_ambig_missing = true;
_model.reset(new Model());
_expected_log_likelihood = 0.0;
_data = nullptr;
_likelihood = nullptr;
_use_underflow_scaling = false;
_lot = nullptr;
_random_seed = 1;
_num_iter = 1000;
_print_freq = 1;
_sample_freq = 1;
<span style="color:#0000ff"><strong>_output_manager = nullptr;</strong></span>
}
You’ll note that I’ve added a private member function named sample
. This function is not essential, but serves to make the run
function more tidy by gathering all code lines related to saving a tree and parameter sample together so that they can be called by adding a single line to the run function whereever sampling needs to occur. Here is the body of the sample
function.
inline void Strom::sample(unsigned iteration, Chain & chain) {
if (chain.getHeatingPower() < 1.0)
return;
bool time_to_sample = (bool)(iteration % _sample_freq == 0);
bool time_to_report = (bool)(iteration % _print_freq == 0);
if (time_to_sample || time_to_report) {
double logLike = chain.getLogLikelihood();
double logPrior = chain.calcLogJointPrior();
double TL = chain.getTreeManip()->calcTreeLength();
if (time_to_report) {
if (logPrior == Updater::getLogZero())
_output_manager->outputConsole(boost::str(boost::format("%12d %12.5f %12s %12.5f") % iteration % logLike % "-infinity" % TL));
else
_output_manager->outputConsole(boost::str(boost::format("%12d %12.5f %12.5f %12.5f") % iteration % logLike % logPrior % TL));
}
if (time_to_sample) {
_output_manager->outputTree(iteration, chain.getTreeManip());
_output_manager->outputParameters(iteration, logLike, logPrior, TL, chain.getModel());
}
}
}
Changes to the run
function involve creating an OutputManager
object and opening/closing tree and sample files. You will also note that some of the blue, bold text reflects sections of code that have been commented out because their functionality has been replaced by the new OutputManager
object. The two calls to the sample
function cause output to be sent to the console and/or sample and tree files at the very start (iteration 0) and at each samplefreq
(or printfreq
) iteration thereafter.
inline void Strom::run() {
std::cout << "Starting..." << std::endl;
std::cout << "Current working directory: " << boost::filesystem::current_path() << std::endl;
try {
std::cout << "\n*** Reading and storing the data in the file " << _data_file_name << std::endl;
_data = Data::SharedPtr(new Data());
_data->setPartition(_partition);
_data->getDataFromFile(_data_file_name);
_model->setSubsetNumPatterns(_data->calcNumPatternsVect());
_model->setSubsetSizes(_partition->calcSubsetSizes());
_model->activate();
// Report information about data partition subsets
unsigned nsubsets = _data->getNumSubsets();
std::cout << "\nNumber of taxa: " << _data->getNumTaxa() << std::endl;
std::cout << "Number of partition subsets: " << nsubsets << std::endl;
for (unsigned subset = 0; subset < nsubsets; subset++) {
DataType dt = _partition->getDataTypeForSubset(subset);
std::cout << " Subset " << (subset+1) << " (" << _data->getSubsetName(subset) << ")" << std::endl;
std::cout << " data type: " << dt.getDataTypeAsString() << std::endl;
std::cout << " sites: " << _data->calcSeqLenInSubset(subset) << std::endl;
std::cout << " patterns: " << _data->getNumPatternsInSubset(subset) << std::endl;
std::cout << " ambiguity: " << (_ambig_missing || dt.isCodon() ? "treated as missing data (faster)" : "handled appropriately (slower)") << std::endl;
}
std::cout << "\n*** Resources available to BeagleLib " << _likelihood->beagleLibVersion() << ":\n";
std::cout << _likelihood->availableResources() << std::endl;
std::cout << "\n*** Creating the likelihood calculator" << std::endl;
_likelihood = Likelihood::SharedPtr(new Likelihood());
_likelihood->setPreferGPU(_use_gpu);
_likelihood->setAmbiguityEqualsMissing(_ambig_missing);
_likelihood->setData(_data);
_likelihood->useUnderflowScaling(_use_underflow_scaling);
std::cout << "\n*** Model description" << std::endl;
std::cout << _model->describeModel() << std::endl;
_likelihood->setModel(_model);
_likelihood->initBeagleLib();
std::cout << "\n*** Reading and storing the first tree in the file " << _tree_file_name << std::endl;
_tree_summary = TreeSummary::SharedPtr(new TreeSummary());
_tree_summary->readTreefile(_tree_file_name, 0);
Tree::SharedPtr tree = _tree_summary->getTree(0);
std::string newick = _tree_summary->getNewick(0);
if (tree->numLeaves() != _data->getNumTaxa())
throw XStrom(boost::format("Number of taxa in tree (%d) does not equal the number of taxa in the data matrix (%d)") % tree->numLeaves() % _data->getNumTaxa());
std::cout << "\n*** Calculating the likelihood of the tree" << std::endl;
TreeManip tm(tree);
tm.selectAllPartials();
tm.selectAllTMatrices();
double lnL = _likelihood->calcLogLikelihood(tree);
std::cout << boost::str(boost::format("log likelihood = %.5f") % lnL) << std::endl;
if (_expected_log_likelihood != 0.0)
std::cout << boost::str(boost::format(" (expecting %.3f)") % _expected_log_likelihood) << std::endl;
// Create a Lot object that generates (pseudo)random numbers
_lot = Lot::SharedPtr(new Lot);
_lot->setSeed(_random_seed);
<span style="color:#0000ff"><strong>// Create an output manager and open output files</strong></span>
<span style="color:#0000ff"><strong>_output_manager.reset(new OutputManager);</strong></span>
// Create a Chain object and take _num_iter steps
Chain chain;
unsigned num_free_parameters = chain.createUpdaters(_model, _lot, _likelihood);
if (num_free_parameters > 0) {
<span style="color:#0000ff"><strong>_output_manager->outputConsole(boost::str(boost::format("\n%12s %12s %12s %12s") % "iteration" % "logLike" % "logPrior" % "TL"));</strong></span>
<span style="color:#0000ff"><strong>_output_manager->openTreeFile("trees.tre", _data);</strong></span>
<span style="color:#0000ff"><strong>_output_manager->openParameterFile("params.txt", _model);</strong></span>
chain.setTreeFromNewick(newick);
chain.start();
<span style="color:#0000ff"><strong>// // Output column headers and first line of output showing starting state (iteration 0)</strong></span>
<span style="color:#0000ff"><strong>//std::cout << boost::str(boost::format("\n%12s %12s %12s %12s %12s %12s\n")</strong></span>
<span style="color:#0000ff"><strong>// % "iteration"</strong></span>
<span style="color:#0000ff"><strong>// % "lnLike"</strong></span>
<span style="color:#0000ff"><strong>// % "lnPrior"</strong></span>
<span style="color:#0000ff"><strong>// % "ratevar"</strong></span>
<span style="color:#0000ff"><strong>// % "accept"</strong></span>
<span style="color:#0000ff"><strong>// % "samples");</strong></span>
<span style="color:#0000ff"><strong>//GammaRateVarUpdater::SharedPtr ratevar_updater = std::dynamic_pointer_cast<GammaRateVarUpdater> (chain.findUpdaterByName("Gamma Rate Variance"));</strong></span>
<span style="color:#0000ff"><strong>//std::cout << boost::str(boost::format("%12d %12.5f %12.5f %12.5f %12.1f %12d\n")</strong></span>
<span style="color:#0000ff"><strong>// % 0</strong></span>
<span style="color:#0000ff"><strong>// % chain.getLogLikelihood()</strong></span>
<span style="color:#0000ff"><strong>// % chain.calcLogJointPrior()</strong></span>
<span style="color:#0000ff"><strong>// % ratevar_updater->getCurrentPoint()</strong></span>
<span style="color:#0000ff"><strong>// % 0</strong></span>
<span style="color:#0000ff"><strong>// % 0);</strong></span>
<span style="color:#0000ff"><strong>sample(0, chain);</strong></span>
for (unsigned iteration = 1; iteration <= _num_iter; ++iteration) {
chain.nextStep(iteration);
<span style="color:#0000ff"><strong>sample(iteration, chain);</strong></span>
<span style="color:#0000ff"><strong>//if (iteration % _sample_freq == 0) {</strong></span>
<span style="color:#0000ff"><strong>// GammaRateVarUpdater::SharedPtr ratevar_updater = std::dynamic_pointer_cast<GammaRateVarUpdater> (chain.findUpdaterByName("Gamma Rate Variance"));</strong></span>
<span style="color:#0000ff"><strong>// double log_prior = chain.calcLogJointPrior();</strong></span>
<span style="color:#0000ff"><strong>// if (log_prior == Updater::getLogZero())</strong></span>
<span style="color:#0000ff"><strong>// std::cout << boost::str(boost::format("%12d %12.5f %12s %12.5f %12.1f %12d\n")</strong></span>
<span style="color:#0000ff"><strong>// % iteration</strong></span>
<span style="color:#0000ff"><strong>// % chain.getLogLikelihood()</strong></span>
<span style="color:#0000ff"><strong>// % "-infinity"</strong></span>
<span style="color:#0000ff"><strong>// % ratevar_updater->getCurrentPoint()</strong></span>
<span style="color:#0000ff"><strong>// % ratevar_updater->getAcceptPct()</strong></span>
<span style="color:#0000ff"><strong>// % ratevar_updater->getNumUpdates());</strong></span>
<span style="color:#0000ff"><strong>// else</strong></span>
<span style="color:#0000ff"><strong>// std::cout << boost::str(boost::format("%12d %12.5f %12.5f %12.5f %12.1f %12d\n")</strong></span>
<span style="color:#0000ff"><strong>// % iteration</strong></span>
<span style="color:#0000ff"><strong>// % chain.getLogLikelihood()</strong></span>
<span style="color:#0000ff"><strong>// % log_prior</strong></span>
<span style="color:#0000ff"><strong>// % ratevar_updater->getCurrentPoint()</strong></span>
<span style="color:#0000ff"><strong>// % ratevar_updater->getAcceptPct()</strong></span>
<span style="color:#0000ff"><strong>// % ratevar_updater->getNumUpdates());</strong></span>
<span style="color:#0000ff"><strong>//}</strong></span>
}
chain.stop();
<span style="color:#0000ff"><strong>// Close output files</strong></span>
<span style="color:#0000ff"><strong>_output_manager->closeTreeFile();</strong></span>
<span style="color:#0000ff"><strong>_output_manager->closeParameterFile();</strong></span>
<span style="color:#0000ff"><strong>// Summarize updater efficiency</strong></span>
<span style="color:#0000ff"><strong>_output_manager->outputConsole("\nChain summary:");</strong></span>
<span style="color:#0000ff"><strong>std::vector<std::string> names = chain.getUpdaterNames();</strong></span>
<span style="color:#0000ff"><strong>std::vector<double> lambdas = chain.getLambdas();</strong></span>
<span style="color:#0000ff"><strong>std::vector<double> accepts = chain.getAcceptPercentages();</strong></span>
<span style="color:#0000ff"><strong>std::vector<unsigned> nupdates = chain.getNumUpdates();</strong></span>
<span style="color:#0000ff"><strong>unsigned n = (unsigned)names.size();</strong></span>
<span style="color:#0000ff"><strong>_output_manager->outputConsole(boost::str(boost::format("%30s %15s %15s %15s") % "Updater" % "Tuning Param." % "Accept %" % "No. Updates"));</strong></span>
<span style="color:#0000ff"><strong>for (unsigned i = 0; i < n; ++i) {</strong></span>
<span style="color:#0000ff"><strong>_output_manager->outputConsole(boost::str(boost::format("%30s %15.3f %15.1f %15d") % names[i] % lambdas[i] % accepts[i] % nupdates[i]));</strong></span>
<span style="color:#0000ff"><strong>}</strong></span>
}
else {
<span style="color:#0000ff"><strong>_output_manager->outputConsole("MCMC skipped because there are no free parameters in the model");</strong></span>
}
}
catch (XStrom & x) {
std::cerr << "Strom encountered a problem:\n " << x.what() << std::endl;
}
std::cout << "\nFinished!" << std::endl;
}