We need to add a data member to Tree
and some functions to the Node
and TreeManip
class to facilitate dealing with trees containing polytomies.
Add the highlighted lines below to the file tree.hpp.
#pragma once
<span style="color:#0000ff"><strong>#include <stack></strong></span>
#include <memory>
#include <iostream>
#include "node.hpp"
namespace strom {
class TreeManip;
class Likelihood;
class Updater;
class TreeUpdater;
<span style="color:#0000ff"><strong>class PolytomyUpdater;</strong></span>
class Tree {
friend class TreeManip;
friend class Likelihood;
friend class Updater;
friend class TreeUpdater;
<span style="color:#0000ff"><strong>friend class PolytomyUpdater;</strong></span>
public:
Tree();
~Tree();
bool isRooted() const;
unsigned numLeaves() const;
unsigned numInternals() const;
unsigned numNodes() const;
private:
void clear();
bool _is_rooted;
Node * _root;
unsigned _nleaves;
unsigned _ninternals;
Node::PtrVector _preorder;
Node::PtrVector _levelorder;
Node::Vector _nodes;
<span style="color:#0000ff"><strong>std::vector<Node *> _unused_nodes;</strong></span>
public:
typedef std::shared_ptr<Tree> SharedPtr;
};
Declare (and define) a new function, clearPointers
, in the Node
class declaration at the top of the file node.hpp.
#pragma once
#include <string>
#include <vector>
#include <iostream>
#include "split.hpp"
namespace strom {
class Tree;
class TreeManip;
class Likelihood;
class Updater;
class Node {
friend class Tree;
friend class TreeManip;
friend class Likelihood;
friend class Updater;
public:
Node();
~Node();
Node * getParent() {return _parent;}
Node * getLeftChild() {return _left_child;}
Node * getRightSib() {return _right_sib;}
int getNumber() {return _number;}
std::string getName() {return _name;}
Split getSplit() {return _split;}
bool isSelected() {return _flags & Flag::Selected;}
void select() {_flags |= Flag::Selected;}
void deselect() {_flags &= ~Flag::Selected;}
bool isSelPartial() {return _flags & Flag::SelPartial;}
void selectPartial() {_flags |= Flag::SelPartial;}
void deselectPartial() {_flags &= ~Flag::SelPartial;}
bool isSelTMatrix() {return _flags & Flag::SelTMatrix;}
void selectTMatrix() {_flags |= Flag::SelTMatrix;}
void deselectTMatrix() {_flags &= ~Flag::SelTMatrix;}
bool isAltPartial() {return _flags & Flag::AltPartial;}
void setAltPartial() {_flags |= Flag::AltPartial;}
void clearAltPartial() {_flags &= ~Flag::AltPartial;}
bool isAltTMatrix() {return _flags & Flag::AltTMatrix;}
void setAltTMatrix() {_flags |= Flag::AltTMatrix;}
void clearAltTMatrix() {_flags &= ~Flag::AltTMatrix;}
void flipTMatrix() {isAltTMatrix() ? clearAltTMatrix() : setAltTMatrix();}
void flipPartial() {isAltPartial() ? clearAltPartial() : setAltPartial();}
double getEdgeLength() {return _edge_length;}
void setEdgeLength(double v);
<span style="color:#0000ff"><strong>void clearPointers() {_left_child = _right_sib = _parent = 0;}</strong></span>
static const double _smallest_edge_length;
typedef std::vector<Node> Vector;
typedef std::vector<Node *> PtrVector;
private:
enum Flag {
Selected = (1 << 0),
SelPartial = (1 << 1),
SelTMatrix = (1 << 2),
AltPartial = (1 << 3),
AltTMatrix = (1 << 4)
};
void clear();
Node * _left_child;
Node * _right_sib;
Node * _parent;
int _number;
std::string _name;
double _edge_length;
Split _split;
int _flags;
};
Replace the initialization of the 3 pointer data members _left_child
, _right_sib
, and _parent
with a call to the new clearPointers
function.
inline void Node::clear() {
_flags = 0;
<span style="color:#0000ff"><strong>clearPointers();</strong></span>
<span style="color:#0000ff"><strong>//_left_child = 0;</strong></span>
<span style="color:#0000ff"><strong>//_right_sib = 0;</strong></span>
<span style="color:#0000ff"><strong>//_parent = 0;</strong></span>
_number = -1;
_name = "";
_edge_length = _smallest_edge_length;
}
Add the highlighted lines below to the TreeManip
class declaration.
#pragma once
#include <cassert>
#include <memory>
#include <stack>
#include <queue>
#include <set>
#include <regex>
#include <boost/range/adaptor/reversed.hpp>
#include <boost/format.hpp>
#include "tree.hpp"
#include "lot.hpp"
#include "xstrom.hpp"
namespace strom {
class TreeManip {
public:
TreeManip();
TreeManip(Tree::SharedPtr t);
~TreeManip();
void setTree(Tree::SharedPtr t);
Tree::SharedPtr getTree();
double calcTreeLength() const;
<span style="color:#0000ff"><strong>unsigned calcResolutionClass() const;</strong></span>
unsigned countEdges() const;
<span style="color:#0000ff"><strong>unsigned countInternals() const;</strong></span>
void scaleAllEdgeLengths(double scaler);
void createTestTree();
std::string makeNewick(unsigned precision, bool use_names = false) const;
void buildFromNewick(const std::string newick, bool rooted, bool allow_polytomies);
void storeSplits(std::set<Split> & splitset);
void rerootAtNodeNumber(int node_number);
Node * randomInternalEdge(Lot::SharedPtr lot);
Node * randomChild(Lot::SharedPtr lot, Node * x, Node * avoid, bool parent_included);
void LargetSimonSwap(Node * a, Node * b);
<span style="color:#0000ff"><strong>bool isPolytomy(Node * nd) const;</strong></span>
<span style="color:#0000ff"><strong>void nniNodeSwap(Node * a, Node * b);</strong></span>
<span style="color:#0000ff"><strong>unsigned countChildren(Node * nd) const;</strong></span>
<span style="color:#0000ff"><strong>Node * findLeftSib(Node * nd);</strong></span>
<span style="color:#0000ff"><strong>Node * findRightmostChild(Node * nd);</strong></span>
<span style="color:#0000ff"><strong>Node * findLastPreorderInClade(Node * start);</strong></span>
<span style="color:#0000ff"><strong>void insertSubtreeOnLeft(Node * s, Node * u);</strong></span>
<span style="color:#0000ff"><strong>void insertSubtreeOnRight(Node * s, Node * u);</strong></span>
<span style="color:#0000ff"><strong>void detachSubtree(Node * s);</strong></span>
<span style="color:#0000ff"><strong>void rectifyNumInternals(int incr);</strong></span>
<span style="color:#0000ff"><strong>void refreshNavigationPointers();</strong></span>
<span style="color:#0000ff"><strong>Node * getUnusedNode(Node * sought = 0);</strong></span>
<span style="color:#0000ff"><strong>void putUnusedNode(Node * nd);</strong></span>
void selectAll();
void deselectAll();
void selectAllPartials();
void deselectAllPartials();
void selectAllTMatrices();
void deselectAllTMatrices();
void selectPartialsHereToRoot(Node * a);
void flipPartialsAndTMatrices();
void clear();
private:
Node * findNextPreorder(Node * nd);
void refreshPreorder();
void refreshLevelorder();
void renumberInternals();
void rerootAtNode(Node * prospective_root);
void extractNodeNumberFromName(Node * nd, std::set<unsigned> & used);
void extractEdgeLen(Node * nd, std::string edge_length_string);
unsigned countNewickLeaves(const std::string newick);
void stripOutNexusComments(std::string & newick);
bool canHaveSibling(Node * nd, bool rooted, bool allow_polytomies);
Tree::SharedPtr _tree;
public:
typedef std::shared_ptr< TreeManip > SharedPtr;
};
This function returns true if the supplied nd
points to a Node
object representing a polytomy (i.e. nd
has more than 2 children).
inline bool TreeManip::isPolytomy(Node * nd) const {
Node * lchild = nd->_left_child;
assert(lchild); // should only call this function for internal nodes
Node * rchild = lchild->_right_sib;
if (rchild && rchild->_right_sib)
return true;
return false;
}
Returns the resolution class of the managed tree (the resolution class is just the number of internal nodes).
inline unsigned TreeManip::calcResolutionClass() const {
return _tree->_ninternals;
}
Counts and returns the number of child nodes (left child and all its siblings) of the node supplied.
inline unsigned TreeManip::countChildren(Node * nd) const {
assert(nd);
unsigned nchildren = 0;
Node * child = nd->getLeftChild();
while (child) {
nchildren++;
child = child->getRightSib();
}
return nchildren;
}
Counts and returns the number of internal nodes in the managed tree.
inline unsigned TreeManip::countInternals() const {
unsigned m = 0;
for (auto nd : _tree->_preorder) {
if (nd->_left_child)
m++;
}
return m;
}
Add a special case to this function to handle the star tree situation in which there are no internal edges in the tree:
inline Node * TreeManip::randomInternalEdge(Lot::SharedPtr lot) {
// Unrooted case: Rooted case:
//
// 2 3 4 5 1 2 3 4
// \ / / / \ / / /
// \ / / / \ / / /
// 8 / / 7 / /
// \ / / \ / /
// \ / / \ / /
// 7 / 6 /
// \ / \ /
// \ / \ /
// 6 nleaves = 5 5 nleaves = 4
// | preorder length = 7 | preorder length = 7
// | num_internal_edges = 7 - 5 = 2 | num_internal_edges = 7 - 4 - 1 = 2
// 1 choose node 7 or node 8 root choose node 6 or node 7
//
// _preorder = [6, 7, 8, 2, 3, 4, 5] _preorder = [5, 6, 7, 1, 2, 3, 4]
//
// Note: _preorder is actually a vector of T *, but is shown here as a
// vector of integers solely to illustrate the algorithm below
int num_internal_edges = (unsigned)_tree->_preorder.size() - _tree->_nleaves - (_tree->_is_rooted ? 1 : 0);
<span style="color:#0000ff"><strong>if (num_internal_edges == 0) {</strong></span>
<span style="color:#0000ff"><strong>// Star tree: return hub node, which is the first node in the preorder sequence</strong></span>
<span style="color:#0000ff"><strong>return _tree->_preorder[0];</strong></span>
<span style="color:#0000ff"><strong>}</strong></span>
// Add one to skip first node in _preorder vector, which is an internal node whose edge
// is either a terminal edge (if tree is unrooted) or invalid (if tree is rooted)
double uniform_deviate = lot->uniform();
unsigned index_of_chosen = 1 + (unsigned)std::floor(uniform_deviate*num_internal_edges);
unsigned internal_nodes_visited = 0;
Node * chosen_node = 0;
for (auto nd : _tree->_preorder) {
if (nd->_left_child) {
if (internal_nodes_visited == index_of_chosen) {
chosen_node = nd;
break;
}
else
++internal_nodes_visited;
}
}
assert(chosen_node);
return chosen_node;
}
In the previous version of this function, the internal nodes that were not used because of the presence of polytomies were simply numbered. Now that there is the possibility that a tree can make use of these initially unused nodes, we will modify this function to store these currently unused nodes in the _unused_nodes
vector of the managed Tree object.
inline void TreeManip::renumberInternals() {
assert(_tree->_preorder.size() > 0);
// Renumber internal nodes in postorder sequence
unsigned curr_internal = _tree->_nleaves;
for (auto nd : boost::adaptors::reverse(_tree->_preorder)) {
if (nd->_left_child) {
// nd is an internal node
nd->_number = curr_internal++;
}
}
// Root node is not included in _tree->_preorder, so if the root node
// is an internal node we need to number it here
if (_tree->_is_rooted)
_tree->_root->_number = curr_internal++;
_tree->_ninternals = curr_internal - _tree->_nleaves;
<span style="color:#0000ff"><strong>_tree->_unused_nodes.clear();</strong></span>
<span style="color:#0000ff"><strong>for (; curr_internal < (unsigned)_tree->_nodes.size(); curr_internal++) {</strong></span>
<span style="color:#0000ff"><strong>Node * curr = &_tree->_nodes[curr_internal];</strong></span>
<span style="color:#0000ff"><strong>putUnusedNode(curr);</strong></span>
<span style="color:#0000ff"><strong>assert(curr->_number == -1);</strong></span>
<span style="color:#0000ff"><strong>curr->_number = curr_internal;</strong></span>
<span style="color:#0000ff"><strong>}</strong></span>
}
This function determines whether there is a sibling to the left of the supplied Node. If so, a pointer to that left sib is returned; if there is no left sib, NULL is returned.
inline Node * TreeManip::findLeftSib(Node * nd) {
assert(nd);
assert(nd->_parent);
Node * child = nd->_parent->_left_child;
while (child && child->_right_sib != nd)
child = child->_right_sib;
return child;
}
Finds the rightmost child of the supplied Node. If there are no children, returns NULL.
inline Node * TreeManip::findRightmostChild(Node * nd) {
assert(nd);
Node * child = nd->getLeftChild();
while (child->getRightSib())
child = child->getRightSib();
return child;
}
Walks through the entire clade rooted at the supplied node, returning the last descendant in the preorder sequence before leaving the clade.
inline Node * TreeManip::findLastPreorderInClade(Node * start) {
assert(start);
Node * curr = start;
Node * rchild = findRightmostChild(curr);
while (rchild) {
curr = rchild;
rchild = findRightmostChild(curr);
}
return curr;
}
Makes supplied Node s
the left child of supplied Node u
.
inline void TreeManip::insertSubtreeOnLeft(Node * s, Node * u) {
assert(u);
assert(s);
s->_right_sib = u->_left_child;
s->_parent = u;
u->_left_child = s;
}
If supplied Node u
has children, this function makes supplied Node s
the right sib of the rightmost child of u
. If u
has no children currently, s
is made the only child of u
.
inline void TreeManip::insertSubtreeOnRight(Node * s, Node * u) {
assert(u);
assert(s);
s->_right_sib = 0;
s->_parent = u;
if (u->_left_child) {
Node * u_rchild = findRightmostChild(u);
u_rchild->_right_sib = s;
}
else
u->_left_child = s;
}
This function detaches the supplied Node s
from the managed tree. This function is normally used just before calling the insertSubtreeOnLeft
or insertSubtreeOnRight
function to reinsert s
.
inline void TreeManip::detachSubtree(Node * s) {
assert(s);
assert(s->_parent);
// Save pointers to relevant nodes
Node * s_leftsib = findLeftSib(s);
Node * s_rightsib = s->_right_sib;
Node * s_parent = s->_parent;
// Completely detach s and seal up the wound
s->_parent = 0;
s->_right_sib = 0;
if (s_leftsib)
s_leftsib->_right_sib = s_rightsib;
else
s_parent->_left_child = s_rightsib;
}
Tree objects have an _ninternals
data member that holds the number of internal nodes in the tree. Polytomy analyses perform modifications to trees that change the number of internal nodes, and this function is used to keep this Tree data member accurate by adding incr to the value currently stored in the managed tree’s _ninternals
data member. Note that incr
is an int and can be negative.
inline void TreeManip::rectifyNumInternals(int incr) {
assert(_tree->_nodes.size() == _tree->_unused_nodes.size() + _tree->_nleaves + _tree->_ninternals + incr);
_tree->_ninternals += incr;
}
This function calls refreshPreorder
and refreshLevelorder
to ensure that the managed tree’s _preorder and _levelorder vectors accurately reflect the structure of the tree. This function should be called if, for example, a polytomy is created or expanded by a delete-edge move or is broken apart by an add-edge move.
inline void TreeManip::refreshNavigationPointers() {
refreshPreorder();
refreshLevelorder();
}
Polytomies in trees mean that not all elements of the tree’s _nodes
vector are used. The Tree class’s _unused_nodes
stack keeps a list of nodes that are currently not being used. These unused nodes are tapped by the Likelihood::defineOperations
function to help in computing the likelihood on polytomous trees. the getUnusedNode
removes a node from the top of the stack and returns a pointer to the removed Node object. The putUnusedNode
function takes a Node
pointer and pushes it onto the _unused_nodes
stack.
inline Node * TreeManip::getUnusedNode(Node * sought) {
assert(!_tree->_unused_nodes.empty());
Node * nd = 0;
if (sought) {
unsigned i = 0;
for (Node * und : _tree->_unused_nodes) {
if (und == sought) {
nd = und;
_tree->_unused_nodes.erase(_tree->_unused_nodes.begin()+i);
break;
}
++i;
}
assert(nd);
}
else {
nd = _tree->_unused_nodes.back();
_tree->_unused_nodes.pop_back();
}
nd->clearPointers();
return nd;
}
inline void TreeManip::putUnusedNode(Node * nd) {
nd->clearPointers();
_tree->_unused_nodes.push_back(nd);
}