"""
TXTree: A Visual Tool for PubMed Literature Exploration by Text Mining
Copyright (C) 2025 Diogo de Jesus Soares Machado, Roberto Tadeu Raittz

This file is part of TXTree.

TXTree is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.

TXTree is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with TXTree. If not, see <https://www.gnu.org/licenses/>.
"""

from ete3 import Tree
from scipy.cluster.hierarchy import linkage

def mat_to_ete_tree(
        data, labels, method='single', metric='euclidean', reroot=None
    ):
    """
    Converts a data matrix into a phylogenetic tree using the ETE format.

    This function performs hierarchical clustering on the given data matrix
    and converts the resulting linkage matrix into a phylogenetic tree
    represented in Newick format. The tree is then loaded into an ETE3 Tree
    object.

    Parameters:
        - data (np.ndarray): A data matrix (n_samples x n_features) to be
          converted into a tree.
        - labels (list): A list of labels corresponding to the data, used in
          constructing the tree.
        - method (str, optional): The clustering method to be used. Available
          options are the same as those in `scipy.cluster.hierarchy.linkage`
          (e.g., 'ward', 'complete', 'single', 'average', etc.). Defaults to
          'single'.
        - metric (str, optional): The distance metric to be used for
          clustering. Available options are the same as those in
          `scipy.cluster.hierarchy.linkage` (e.g., 'euclidean', 'cosine',
          'cityblock', etc.). Defaults to 'euclidean'.
        - reroot (str, optional): The name of the node where the tree should
          be rerooted. If None, the tree remains as originally constructed.
          Defaults to None.

    Returns:
        - ete3.Tree: A Tree object from the ETE3 library representing the
          phylogenetic tree.
    """
    
    Z = linkage(data, method=method, metric=metric)
    
    # Convert the linkage matrix to Newick format using the provided labels
    # Convert linkage matrix to Newick string
    newick_str = linkage_to_newick(Z, labels)
    # Load the Newick string into an ETE3 Tree object
    ete_tree = Tree(newick_str)
    
    # Perform rerooting if requested
    if reroot is not None:
        # Find the node to reroot at
        node_to_reroot = ete_tree.search_nodes(name=reroot)
        if node_to_reroot:
            ete_tree.set_outgroup(node_to_reroot[0])
        else:
            raise ValueError(
                f"Node with name '{reroot}' not found in the tree."
            )
    
    return ete_tree

def linkage_to_newick(Z, labels):
    """
    Converts a linkage matrix to a Newick string.

    This function processes a linkage matrix generated by hierarchical
    clustering and converts it into a Newick string, which is a standard
    representation of phylogenetic trees.

    Parameters:
        - Z (ndarray): The linkage matrix generated by
          scipy.cluster.hierarchy.linkage.
        - labels (list of str): List of labels for the leaves of the
          dendrogram.

    Returns:
        - str: The Newick string representation of the tree.
    """
    
    n = len(labels)
    
    # Initialize nodes with leaf labels
    nodes = {i: label for i, label in enumerate(labels)}

    # Process linkage matrix to build the tree in Newick format
    for i, row in enumerate(Z):
        left, right = int(row[0]), int(row[1])
        left_node = nodes.pop(left)
        right_node = nodes.pop(right)
        
        # Combine left and right nodes into a new node
        new_node = f"({left_node}:{row[2]:.6f},{right_node}:{row[2]:.6f})"
        nodes[n + i] = new_node

    # Final node is the root of the tree
    return nodes[max(nodes.keys())] + ";"