19.6 Modifying the Node, Tree, and TreeManip classes

(Linux version)

< 19.5 | 19.6 | 19.7 >

We need to add a data member to Tree and some functions to the Node and TreeManip class to facilitate dealing with trees containing polytomies.

Modifying Tree

Add the highlighted lines below to the file tree.hpp.

#pragma once    

<span style="color:#0000ff"><strong>#include &lt;stack&gt;</strong></span>
#include &lt;memory&gt;
#include &lt;iostream&gt;
#include "node.hpp"

namespace strom {

    class TreeManip;
    class Likelihood;
    class Updater;
    class TreeUpdater;
    <span style="color:#0000ff"><strong>class PolytomyUpdater;</strong></span>

    class Tree {

            friend class TreeManip;
            friend class Likelihood;
            friend class Updater;
            friend class TreeUpdater;
            <span style="color:#0000ff"><strong>friend class PolytomyUpdater;</strong></span>

        public:

                                        Tree();
                                        ~Tree();

            bool                        isRooted() const;
            unsigned                    numLeaves() const;
            unsigned                    numInternals() const;
            unsigned                    numNodes() const;
        
        private:

            void                        clear();

            bool                        _is_rooted;
            Node *                      _root;
            unsigned                    _nleaves;
            unsigned                    _ninternals;
            Node::PtrVector             _preorder;
            Node::PtrVector             _levelorder;
            Node::Vector                _nodes;
            <span style="color:#0000ff"><strong>std::vector&lt;Node *&gt;         _unused_nodes;</strong></span>

        public:

            typedef std::shared_ptr&lt;Tree&gt; SharedPtr;
    };
    

Modifying Node

Declare (and define) a new function, clearPointers, in the Node class declaration at the top of the file node.hpp.

#pragma once    

#include &lt;string&gt;
#include &lt;vector&gt;
#include  &lt;iostream&gt;
#include "split.hpp"

namespace strom {

    class Tree;
    class TreeManip;
    class Likelihood;
    class Updater;

    class Node {
        friend class Tree;
        friend class TreeManip;
        friend class Likelihood;
        friend class Updater;

        public:
                                        Node();
                                        ~Node();

                    Node *              getParent()                 {return _parent;}
                    Node *              getLeftChild()              {return _left_child;}
                    Node *              getRightSib()               {return _right_sib;}
                    int                 getNumber()                 {return _number;}
                    std::string         getName()                   {return _name;}
                    Split               getSplit()                  {return _split;}
        
                    bool                isSelected()                {return _flags & Flag::Selected;}
                    void                select()                    {_flags |= Flag::Selected;}
                    void                deselect()                  {_flags &= ~Flag::Selected;}

                    bool                isSelPartial()              {return _flags & Flag::SelPartial;}
                    void                selectPartial()             {_flags |= Flag::SelPartial;}
                    void                deselectPartial()           {_flags &= ~Flag::SelPartial;}

                    bool                isSelTMatrix()              {return _flags & Flag::SelTMatrix;}
                    void                selectTMatrix()             {_flags |= Flag::SelTMatrix;}
                    void                deselectTMatrix()           {_flags &= ~Flag::SelTMatrix;}

                    bool                isAltPartial()              {return _flags & Flag::AltPartial;}
                    void                setAltPartial()             {_flags |= Flag::AltPartial;}
                    void                clearAltPartial()           {_flags &= ~Flag::AltPartial;}

                    bool                isAltTMatrix()              {return _flags & Flag::AltTMatrix;}
                    void                setAltTMatrix()             {_flags |= Flag::AltTMatrix;}
                    void                clearAltTMatrix()           {_flags &= ~Flag::AltTMatrix;}
                    
                    void                flipTMatrix()               {isAltTMatrix() ? clearAltTMatrix() : setAltTMatrix();}
                    void                flipPartial()               {isAltPartial() ? clearAltPartial() : setAltPartial();}

                    double              getEdgeLength()             {return _edge_length;}
                    void                setEdgeLength(double v);

                    <span style="color:#0000ff"><strong>void                clearPointers()             {_left_child = _right_sib = _parent = 0;}</strong></span>
                                        
            static const double _smallest_edge_length;

            typedef std::vector&lt;Node&gt;    Vector;
            typedef std::vector&lt;Node *&gt;  PtrVector;
        
        private:
        
            enum Flag {
                Selected   = (1 &lt;&lt; 0),
                SelPartial = (1 &lt;&lt; 1),
                SelTMatrix = (1 &lt;&lt; 2),
                AltPartial = (1 &lt;&lt; 3),
                AltTMatrix = (1 &lt;&lt; 4)
            };

            void                clear();

            Node *              _left_child;
            Node *              _right_sib;
            Node *              _parent;
            int                 _number;
            std::string         _name;
            double              _edge_length;
            Split               _split;
            int                 _flags;
    };
    

Replace the initialization of the 3 pointer data members _left_child, _right_sib, and _parent with a call to the new clearPointers function.

    inline void Node::clear() { 
        _flags = 0;
        <span style="color:#0000ff"><strong>clearPointers();</strong></span>
        <span style="color:#0000ff"><strong>//_left_child = 0;</strong></span>
        <span style="color:#0000ff"><strong>//_right_sib = 0;</strong></span>
        <span style="color:#0000ff"><strong>//_parent = 0;</strong></span>
        _number = -1;
        _name = "";
        _edge_length = _smallest_edge_length;
    }   

Modifying TreeManip

Add the highlighted lines below to the TreeManip class declaration.

#pragma once    

#include &lt;cassert&gt;
#include &lt;memory&gt;
#include &lt;stack&gt;
#include &lt;queue&gt;
#include &lt;set&gt;
#include &lt;regex&gt;
#include &lt;boost/range/adaptor/reversed.hpp&gt;
#include &lt;boost/format.hpp&gt;
#include "tree.hpp"
#include "lot.hpp"
#include "xstrom.hpp"

namespace strom {

    class TreeManip {
        public:
                                        TreeManip();
                                        TreeManip(Tree::SharedPtr t);
                                        ~TreeManip();

            void                        setTree(Tree::SharedPtr t);
            Tree::SharedPtr             getTree();

            double                      calcTreeLength() const;
            <span style="color:#0000ff"><strong>unsigned                    calcResolutionClass() const;</strong></span>
            unsigned                    countEdges() const;
            <span style="color:#0000ff"><strong>unsigned                    countInternals() const;</strong></span>
            void                        scaleAllEdgeLengths(double scaler);
            
            void                        createTestTree();
            std::string                 makeNewick(unsigned precision, bool use_names = false) const;

            void                        buildFromNewick(const std::string newick, bool rooted, bool allow_polytomies);
            void                        storeSplits(std::set&lt;Split&gt; & splitset);
            void                        rerootAtNodeNumber(int node_number);
        
            Node *                      randomInternalEdge(Lot::SharedPtr lot);
            Node *                      randomChild(Lot::SharedPtr lot, Node * x, Node * avoid, bool parent_included);
            void                        LargetSimonSwap(Node * a, Node * b);
            
            <span style="color:#0000ff"><strong>bool                        isPolytomy(Node * nd) const;</strong></span>
            <span style="color:#0000ff"><strong>void                        nniNodeSwap(Node * a, Node * b);</strong></span>
            <span style="color:#0000ff"><strong>unsigned                    countChildren(Node * nd) const;</strong></span>
            <span style="color:#0000ff"><strong>Node *                      findLeftSib(Node * nd);</strong></span>
            <span style="color:#0000ff"><strong>Node *                      findRightmostChild(Node * nd);</strong></span>
            <span style="color:#0000ff"><strong>Node *                      findLastPreorderInClade(Node * start);</strong></span>
            <span style="color:#0000ff"><strong>void                        insertSubtreeOnLeft(Node * s, Node * u);</strong></span>
            <span style="color:#0000ff"><strong>void                        insertSubtreeOnRight(Node * s, Node * u);</strong></span>
            <span style="color:#0000ff"><strong>void                        detachSubtree(Node * s);</strong></span>
            <span style="color:#0000ff"><strong>void                        rectifyNumInternals(int incr);</strong></span>
            <span style="color:#0000ff"><strong>void                        refreshNavigationPointers();</strong></span>
            <span style="color:#0000ff"><strong>Node *                      getUnusedNode(Node * sought = 0);</strong></span>
            <span style="color:#0000ff"><strong>void                        putUnusedNode(Node * nd);</strong></span>
            
            void                        selectAll();
            void                        deselectAll();
            void                        selectAllPartials();
            void                        deselectAllPartials();
            void                        selectAllTMatrices();
            void                        deselectAllTMatrices();
            void                        selectPartialsHereToRoot(Node * a);
            void                        flipPartialsAndTMatrices();

            void                        clear();
            
        private:

            Node *                      findNextPreorder(Node * nd);
            void                        refreshPreorder();
            void                        refreshLevelorder();
            void                        renumberInternals();
            void                        rerootAtNode(Node * prospective_root);
            void                        extractNodeNumberFromName(Node * nd, std::set&lt;unsigned&gt; & used);
            void                        extractEdgeLen(Node * nd, std::string edge_length_string);
            unsigned                    countNewickLeaves(const std::string newick);
            void                        stripOutNexusComments(std::string & newick);
            bool                        canHaveSibling(Node * nd, bool rooted, bool allow_polytomies);

            Tree::SharedPtr             _tree;

        public:

            typedef std::shared_ptr&lt; TreeManip &gt; SharedPtr;
    };


Add function isPolytomy

This function returns true if the supplied nd points to a Node object representing a polytomy (i.e. nd has more than 2 children).

    inline bool TreeManip::isPolytomy(Node * nd) const {   
        Node * lchild = nd-&gt;_left_child;
        assert(lchild);    // should only call this function for internal nodes
        
        Node * rchild = lchild-&gt;_right_sib;
        if (rchild && rchild-&gt;_right_sib)
            return true;
        return false;
    }   

The calcResolutionClass function

Returns the resolution class of the managed tree (the resolution class is just the number of internal nodes).

    inline unsigned TreeManip::calcResolutionClass() const {    
        return _tree-&gt;_ninternals;
    }   

The countChildren function

Counts and returns the number of child nodes (left child and all its siblings) of the node supplied.

    inline unsigned TreeManip::countChildren(Node * nd) const { 
        assert(nd);
        unsigned nchildren = 0;
        Node * child = nd-&gt;getLeftChild();
        while (child) {
            nchildren++;
            child = child-&gt;getRightSib();
        }
        return nchildren;
    }   

The countInternals function

Counts and returns the number of internal nodes in the managed tree.

    inline unsigned TreeManip::countInternals() const { 
        unsigned m = 0;
        for (auto nd : _tree-&gt;_preorder) {
            if (nd-&gt;_left_child)
                m++;
        }
        return m;
    }   

Modifying the randomInternalEdge function

Add a special case to this function to handle the star tree situation in which there are no internal edges in the tree:

    inline Node * TreeManip::randomInternalEdge(Lot::SharedPtr lot) {   
        // Unrooted case:                        Rooted case:
        //
        // 2     3     4     5                   1     2     3     4
        //  \   /     /     /                     \   /     /     /
        //   \ /     /     /                       \ /     /     /
        //    8     /     /                         7     /     /
        //     \   /     /                           \   /     /
        //      \ /     /                             \ /     /
        //       7     /                               6     /
        //        \   /                                 \   /
        //         \ /                                   \ /
        //          6   nleaves = 5                       5    nleaves = 4
        //          |   preorder length = 7               |    preorder length = 7
        //          |   num_internal_edges = 7 - 5 = 2    |    num_internal_edges = 7 - 4 - 1 = 2
        //          1   choose node 7 or node 8          root  choose node 6 or node 7
        //
        // _preorder = [6, 7, 8, 2, 3, 4, 5]     _preorder = [5, 6, 7, 1, 2, 3, 4]
        //
        // Note: _preorder is actually a vector of T *, but is shown here as a
        // vector of integers solely to illustrate the algorithm below
        
        int num_internal_edges = (unsigned)_tree-&gt;_preorder.size() - _tree-&gt;_nleaves - (_tree-&gt;_is_rooted ? 1 : 0);
        <span style="color:#0000ff"><strong>if (num_internal_edges == 0) {</strong></span>
            <span style="color:#0000ff"><strong>// Star tree: return hub node, which is the first node in the preorder sequence</strong></span>
            <span style="color:#0000ff"><strong>return _tree-&gt;_preorder[0];</strong></span>
        <span style="color:#0000ff"><strong>}</strong></span>

        // Add one to skip first node in _preorder vector, which is an internal node whose edge
        // is either a terminal edge (if tree is unrooted) or invalid (if tree is rooted)
        double uniform_deviate = lot-&gt;uniform();
        unsigned index_of_chosen = 1 + (unsigned)std::floor(uniform_deviate*num_internal_edges);

        unsigned internal_nodes_visited = 0;
        Node * chosen_node = 0;
        for (auto nd : _tree-&gt;_preorder) {
            if (nd-&gt;_left_child) {
                if (internal_nodes_visited == index_of_chosen) {
                    chosen_node = nd;
                    break;
                }
                else
                    ++internal_nodes_visited;
            }
        }
        assert(chosen_node);
        return chosen_node;
    }   

Modifying the renumberInternals function

In the previous version of this function, the internal nodes that were not used because of the presence of polytomies were simply numbered. Now that there is the possibility that a tree can make use of these initially unused nodes, we will modify this function to store these currently unused nodes in the _unused_nodes vector of the managed Tree object.

    inline void TreeManip::renumberInternals() {    
        assert(_tree-&gt;_preorder.size() &gt; 0);
        
        // Renumber internal nodes in postorder sequence
        unsigned curr_internal = _tree-&gt;_nleaves;
        for (auto nd : boost::adaptors::reverse(_tree-&gt;_preorder)) {
            if (nd-&gt;_left_child) {
                // nd is an internal node
                nd-&gt;_number = curr_internal++;
            }
        }
        
        // Root node is not included in _tree-&gt;_preorder, so if the root node
        // is an internal node we need to number it here
        if (_tree-&gt;_is_rooted)
            _tree-&gt;_root-&gt;_number = curr_internal++;
            
        _tree-&gt;_ninternals = curr_internal - _tree-&gt;_nleaves;
        
        <span style="color:#0000ff"><strong>_tree-&gt;_unused_nodes.clear();</strong></span>
        <span style="color:#0000ff"><strong>for (; curr_internal &lt; (unsigned)_tree-&gt;_nodes.size(); curr_internal++) {</strong></span>
            <span style="color:#0000ff"><strong>Node * curr = &_tree-&gt;_nodes[curr_internal];</strong></span>
            <span style="color:#0000ff"><strong>putUnusedNode(curr);</strong></span>
            <span style="color:#0000ff"><strong>assert(curr-&gt;_number == -1);</strong></span>
            <span style="color:#0000ff"><strong>curr-&gt;_number = curr_internal;</strong></span>
        <span style="color:#0000ff"><strong>}</strong></span>
        
    }   

Adding the findLeftSib function

This function determines whether there is a sibling to the left of the supplied Node. If so, a pointer to that left sib is returned; if there is no left sib, NULL is returned.

    inline Node * TreeManip::findLeftSib(Node * nd) {   
        assert(nd);
        assert(nd-&gt;_parent);
        Node * child = nd-&gt;_parent-&gt;_left_child;
        while (child && child-&gt;_right_sib != nd)
            child = child-&gt;_right_sib;
        return child;
    }   

Adding the findRightmostChild function

Finds the rightmost child of the supplied Node. If there are no children, returns NULL.

    inline Node * TreeManip::findRightmostChild(Node * nd) {    
        assert(nd);
        Node * child = nd-&gt;getLeftChild();
        while (child-&gt;getRightSib())
            child = child-&gt;getRightSib();
        return child;
    }   

Adding the findLastPreorderInClade function

Walks through the entire clade rooted at the supplied node, returning the last descendant in the preorder sequence before leaving the clade.

    inline Node * TreeManip::findLastPreorderInClade(Node * start) {    
        assert(start);
        Node * curr = start;
        Node * rchild = findRightmostChild(curr);
        while (rchild) {
            curr = rchild;
            rchild = findRightmostChild(curr);
        }
        return curr;
    }   

Adding the insertSubtreeOnLeft function

Makes supplied Node s the left child of supplied Node u.

    inline void TreeManip::insertSubtreeOnLeft(Node * s, Node * u) {    
        assert(u);
        assert(s);
        s-&gt;_right_sib  = u-&gt;_left_child;
        s-&gt;_parent     = u;
        u-&gt;_left_child = s;
    }   

Adding the insertSubtreeOnRight function

If supplied Node u has children, this function makes supplied Node s the right sib of the rightmost child of u. If u has no children currently, s is made the only child of u.

    inline void TreeManip::insertSubtreeOnRight(Node * s, Node * u) {   
        assert(u);
        assert(s);

        s-&gt;_right_sib = 0;
        s-&gt;_parent    = u;
        if (u-&gt;_left_child) {
            Node * u_rchild = findRightmostChild(u);
            u_rchild-&gt;_right_sib = s;
        }
        else
            u-&gt;_left_child = s;
    }   

Adding the detachSubtree function

This function detaches the supplied Node s from the managed tree. This function is normally used just before calling the insertSubtreeOnLeft or insertSubtreeOnRight function to reinsert s.

    inline void TreeManip::detachSubtree(Node * s) {    
        assert(s);
        assert(s-&gt;_parent);
        
        // Save pointers to relevant nodes
        Node * s_leftsib  = findLeftSib(s);
        Node * s_rightsib = s-&gt;_right_sib;
        Node * s_parent   = s-&gt;_parent;

        // Completely detach s and seal up the wound
        s-&gt;_parent = 0;
        s-&gt;_right_sib = 0;
        if (s_leftsib)
            s_leftsib-&gt;_right_sib = s_rightsib;
        else
            s_parent-&gt;_left_child = s_rightsib;
    }   

Adding the rectifyNumInternals function

Tree objects have an _ninternals data member that holds the number of internal nodes in the tree. Polytomy analyses perform modifications to trees that change the number of internal nodes, and this function is used to keep this Tree data member accurate by adding incr to the value currently stored in the managed tree’s _ninternals data member. Note that incr is an int and can be negative.

    inline void TreeManip::rectifyNumInternals(int incr) {  
        assert(_tree-&gt;_nodes.size() == _tree-&gt;_unused_nodes.size() + _tree-&gt;_nleaves + _tree-&gt;_ninternals + incr);
        _tree-&gt;_ninternals += incr;
    }   

Adding the refreshNavigationPointers function

This function calls refreshPreorder and refreshLevelorder to ensure that the managed tree’s _preorder and _levelorder vectors accurately reflect the structure of the tree. This function should be called if, for example, a polytomy is created or expanded by a delete-edge move or is broken apart by an add-edge move.

    inline void TreeManip::refreshNavigationPointers() {    
        refreshPreorder();
        refreshLevelorder();
    }   

Adding the getUnusedNode and putUnusedNode functions

Polytomies in trees mean that not all elements of the tree’s _nodes vector are used. The Tree class’s _unused_nodes stack keeps a list of nodes that are currently not being used. These unused nodes are tapped by the Likelihood::defineOperations function to help in computing the likelihood on polytomous trees. the getUnusedNode removes a node from the top of the stack and returns a pointer to the removed Node object. The putUnusedNode function takes a Node pointer and pushes it onto the _unused_nodes stack.

    inline Node * TreeManip::getUnusedNode(Node * sought) {  
        assert(!_tree-&gt;_unused_nodes.empty());
        Node * nd = 0;
        if (sought) {
            unsigned i = 0;
            for (Node * und : _tree-&gt;_unused_nodes) {
                if (und == sought) {
                    nd = und;
                    _tree-&gt;_unused_nodes.erase(_tree-&gt;_unused_nodes.begin()+i);
                    break;
                }
                ++i;
            }
            assert(nd);
        }
        else {
            nd = _tree-&gt;_unused_nodes.back();
            _tree-&gt;_unused_nodes.pop_back();
        }
        nd-&gt;clearPointers();
        return nd;
    }   
    
    inline void TreeManip::putUnusedNode(Node * nd) {   
        nd-&gt;clearPointers();
        _tree-&gt;_unused_nodes.push_back(nd);
    }   

< 19.5 | 19.6 | 19.7 >