The OutputManager
class depends on (1) the Data::createTaxaBlock
and Data::createTranslateStatement
functions to create a taxa block and a translate statement, respectively, in its openTreeFile
function; the Model::paramNamesAsString
and Model::paramValuesAsString
functions to obtain current model parameter names in its openParamFile
function and values in its outputParameters
function; and the TreeManip::makeNewick
function to provide the newick tree description that it saves to the tree file in its outputTree
function.
The last of these (TreeManip::makeNewick
) already exists, and the following two sections add the other helper functions to the Data
and Model
classes.
Add the bold, blue lines to the Data
class declaration in the file data.hpp
.
class Data {
public:
typedef std::vector<std::string> taxon_names_t;
typedef unsigned long long state_t;
typedef std::vector<state_t> pattern_vect_t;
typedef std::vector<state_t> monomorphic_vect_t;
typedef std::vector<int> partition_key_t;
typedef std::map<pattern_vect_t,unsigned> pattern_map_t;
typedef std::vector<pattern_vect_t> data_matrix_t;
typedef std::vector<pattern_map_t> pattern_map_vect_t;
typedef std::vector<double> pattern_counts_t;
typedef std::vector<unsigned> subset_end_t;
typedef std::vector<unsigned> npatterns_vect_t;
typedef std::pair<unsigned, unsigned> begin_end_pair_t;
typedef std::shared_ptr<Data> SharedPtr;
Data();
~Data();
Partition::SharedPtr getPartition();
void setPartition(Partition::SharedPtr partition);
void getDataFromFile(const std::string filename);
unsigned getNumSubsets() const;
std::string getSubsetName(unsigned subset) const;
unsigned getNumTaxa() const;
const taxon_names_t & getTaxonNames() const;
unsigned getNumPatterns() const;
npatterns_vect_t calcNumPatternsVect() const;
unsigned getNumPatternsInSubset(unsigned subset) const;
unsigned getNumStatesForSubset(unsigned subset) const;
unsigned calcSeqLen() const;
unsigned calcSeqLenInSubset(unsigned subset) const;
const data_matrix_t & getDataMatrix() const;
begin_end_pair_t getSubsetBeginEnd(unsigned subset) const;
const pattern_counts_t & getPatternCounts() const;
const monomorphic_vect_t & getMonomorphic() const;
const partition_key_t & getPartitionKey() const;
<span style="color:#0000ff"><strong>std::string createTaxaBlock() const;</strong></span>
<span style="color:#0000ff"><strong>std::string createTranslateStatement() const;</strong></span>
void clear();
private:
unsigned storeTaxonNames(NxsTaxaBlock * taxaBlock, unsigned taxa_block_index);
unsigned storeData(unsigned ntax, unsigned nchar, NxsCharactersBlock * charBlock, NxsCharactersBlock::DataTypesEnum datatype);
unsigned buildSubsetSpecificMaps(unsigned ntaxa, unsigned seqlen, unsigned nsubsets);
void updatePatternMap(Data::pattern_vect_t & pattern, unsigned subset);
void compressPatterns();
Partition::SharedPtr _partition;
pattern_counts_t _pattern_counts;
monomorphic_vect_t _monomorphic;
partition_key_t _partition_key;
pattern_map_vect_t _pattern_map_vect;
taxon_names_t _taxon_names;
data_matrix_t _data_matrix;
subset_end_t _subset_end;
};
Now add the 2 function bodies below the class declaration but before the namespace-closing right curly bracket.
inline std::string Data::createTaxaBlock() const {
std::string s = "";
s += "begin taxa;\n";
s += boost::str(boost::format(" dimensions ntax=%d;\n") % _taxon_names.size());
s += " taxlabels\n";
for (auto nm : _taxon_names) {
std::string taxon_name = std::regex_replace(nm, std::regex(" "), "_");
s += " " + taxon_name + "\n";
}
s += " ;\n";
s += "end;\n";
return s;
}
inline std::string Data::createTranslateStatement() const {
std::string s = "";
s += " translate\n";
unsigned t = 1;
for (auto nm : _taxon_names) {
std::string taxon_name = std::regex_replace(nm, std::regex(" "), "_");
s += boost::str(boost::format(" %d %s%s\n") % t % taxon_name % (t < _taxon_names.size() ? "," : ""));
t++;
}
s += " ;\n";
return s;
}
Add the bold, blue lines to the Model
class declaration in the file model.hpp.
class Model {
friend class Likelihood;
public:
typedef std::vector<ASRV::SharedPtr> asrv_vect_t;
typedef std::vector<QMatrix::SharedPtr> qmat_vect_t;
typedef std::vector<unsigned> subset_sizes_t;
typedef std::vector<DataType> subset_datatype_t;
typedef std::vector<double> subset_relrate_vect_t;
typedef std::vector<QMatrix::SharedPtr> state_freq_params_t;
typedef std::vector<QMatrix::SharedPtr> exchangeability_params_t;
typedef std::vector<QMatrix::SharedPtr> omega_params_t;
typedef std::vector<ASRV::SharedPtr> ratevar_params_t;
typedef std::vector<ASRV::SharedPtr> pinvar_params_t;
typedef boost::shared_ptr<Model> SharedPtr;
Model();
~Model();
void activate();
void inactivate();
std::string describeModel();
void setSubsetDataTypes(const subset_datatype_t & datatype_vect);
void setSubsetRateVar(ASRV::ratevar_ptr_t ratevar, unsigned subset, bool fixed);
void setSubsetPinvar(ASRV::pinvar_ptr_t pinvar, unsigned subset, bool fixed);
void setSubsetExchangeabilities(QMatrix::freq_xchg_ptr_t exchangeabilities, unsigned subset, bool fixed);
void setSubsetStateFreqs(QMatrix::freq_xchg_ptr_t state_frequencies, unsigned subset, bool fixed);
void setSubsetOmega(QMatrix::omega_ptr_t omega, unsigned subset, bool fixed);
void setSubsetRelRates(subset_relrate_vect_t & relrates, bool fixed);
subset_relrate_vect_t & getSubsetRelRates();
bool isFixedSubsetRelRates() const;
double calcNormalizingConstantForSubsetRelRates() const;
void setTreeIndex(unsigned i, bool fixed);
unsigned getTreeIndex() const;
bool isFixedTree() const;
unsigned getNumSubsets() const;
unsigned getNumSites() const;
unsigned getSubsetNumSites(unsigned subset) const;
const QMatrix & getQMatrix(unsigned subset) const;
const ASRV & getASRV(unsigned subset) const;
void setSubsetIsInvarModel(bool is_invar, unsigned subset);
bool getSubsetIsInvarModel(unsigned subset) const;
void setSubsetSizes(const subset_sizes_t nsites_vect);
subset_sizes_t & getSubsetSizes();
void setSubsetNumPatterns(const subset_sizes_t npatterns_vect);
unsigned getSubsetNumPatterns(unsigned subset) const;
void setSubsetNumCateg(unsigned ncateg, unsigned subset);
unsigned getSubsetNumCateg(unsigned subset) const;
state_freq_params_t & getStateFreqParams();
exchangeability_params_t & getExchangeabilityParams();
omega_params_t & getOmegaParams();
ratevar_params_t & getRateVarParams();
pinvar_params_t & getPinvarParams();
int setBeagleEigenDecomposition(int beagle_instance, unsigned subset, unsigned instance_subset);
int setBeagleStateFrequencies(int beagle_instance, unsigned subset, unsigned instance_subset);
int setBeagleAmongSiteRateVariationRates(int beagle_instance, unsigned subset, unsigned instance_subset);
int setBeagleAmongSiteRateVariationProbs(int beagle_instance, unsigned subset, unsigned instance_subset);
<span style="color:#0000ff"><strong>std::string paramNamesAsString(std::string sep) const;</strong></span>
<span style="color:#0000ff"><strong>std::string paramValuesAsString(std::string sep) const;</strong></span>
private:
void clear();
unsigned _num_subsets;
unsigned _num_sites;
subset_sizes_t _subset_sizes;
subset_sizes_t _subset_npatterns;
subset_datatype_t _subset_datatypes;
qmat_vect_t _qmatrix;
asrv_vect_t _asrv;
bool _tree_index;
bool _tree_fixed;
bool _subset_relrates_fixed;
subset_relrate_vect_t _subset_relrates;
state_freq_params_t _state_freq_params;
exchangeability_params_t _exchangeability_params;
omega_params_t _omega_params;
ratevar_params_t _ratevar_params;
pinvar_params_t _pinvar_params;
};
Add the function body for paramNamesAsString
below the class declaration but before the namespace-closing right curly bracket. This creates a row of parameter names (headers) that serve as a key to the values in the columns below. The index of the subset to which each parameter belongs is incorporated into the name, with subset 0 being the first subset (even if there is only one subset).
inline std::string Model::paramNamesAsString(std::string sep) const {
unsigned k;
std::string s = "";
if (_num_subsets > 1) {
for (k = 0; k < _num_subsets; k++) {
s += boost::str(boost::format("m-%d%s") % k % sep);
}
}
for (k = 0; k < _num_subsets; k++) {
if (_subset_datatypes[k].isNucleotide()) {
s += boost::str(boost::format("rAC-%d%srAG-%d%srAT-%d%srCG-%d%srCT-%d%srGT-%d%s") % k % sep % k % sep % k % sep % k % sep % k % sep % k % sep);
s += boost::str(boost::format("piA-%d%spiC-%d%spiG-%d%spiT-%d%s") % k % sep % k % sep % k % sep % k % sep);
}
else if (_subset_datatypes[k].isCodon()) {
s += boost::str(boost::format("omega-%d%s") % k % sep);
for (std::string codon : _subset_datatypes[0].getGeneticCode()->_codons)
s += boost::str(boost::format("pi%s-%d%s") % codon % k % sep);
}
if (_asrv[k]->getIsInvarModel()) {
s += boost::str(boost::format("pinvar-%d%s") % k % sep);
}
if (_asrv[k]->getNumCateg() > 1) {
s += boost::str(boost::format("ratevar-%d%s") % k % sep);
}
}
return s;
}
Add the function body for paramValuesAsString
below the class declaration but before the namespace-closing right curly bracket. This function is nearly identical to paramNamesAsString
except that it outputs current parameter values rather than parameter names.
inline std::string Model::paramValuesAsString(std::string sep) const {
unsigned k;
std::string s = "";
if (_num_subsets > 1) {
for (k = 0; k < _num_subsets; k++) {
s += boost::str(boost::format("%.5f%s") % _subset_relrates[k] % sep);
}
}
for (k = 0; k < _num_subsets; k++) {
if (_subset_datatypes[k].isNucleotide()) {
QMatrix::freq_xchg_t x = *_qmatrix[k]->getExchangeabilitiesSharedPtr();
s += boost::str(boost::format("%.5f%s%.5f%s%.5f%s%.5f%s%.5f%s%.5f%s") % x[0] % sep % x[1] % sep % x[2] % sep % x[3] % sep % x[4] % sep % x[5] % sep);
QMatrix::freq_xchg_t f = *_qmatrix[k]->getStateFreqsSharedPtr();
s += boost::str(boost::format("%.5f%s%.5f%s%.5f%s%.5f%s") % f[0] % sep % f[1] % sep % f[2] % sep % f[3] % sep);
}
else if (_subset_datatypes[k].isCodon()) {
s += boost::str(boost::format("%.5f%s") % _qmatrix[k]->getOmega() % sep);
QMatrix::freq_xchg_t f = *_qmatrix[k]->getStateFreqsSharedPtr();
for (unsigned m = 0; m < _subset_datatypes[0].getNumStates(); m++)
s += boost::str(boost::format("%.5f%s") % f[m] % sep);
}
if (_asrv[k]->getIsInvarModel()) {
s += boost::str(boost::format("%.5f%s") % _asrv[k]->getPinvar() % sep);
}
if (_asrv[k]->getNumCateg() > 1) {
s += boost::str(boost::format("%.5f%s") % _asrv[k]->getRateVar() % sep);
}
}
return s;
}