Source code for thema.multiverse.universe.stars.gudhiStar

# File: multiverse/universe/stars/gudhiStar.py
# Last Update: 11-19-25
# Updated by: JW


import networkx as nx
from gudhi.cover_complex import MapperComplex


from ..star import Star
from ..utils.starHelpers import (
    convert_keys_to_alphabet,
    get_clusterer,
    mapper_unclustered_items,
    mapper_pseudo_laplacian,
)
from ..utils.starGraph import starGraph



[docs]
def initialize():
    """Returns gudhiStar class from module."""
    return gudhiStar




[docs]
class gudhiStar(Star):
    """
    GUDHI Star Class
    ----------
    - inherits from Star

    Generates a graph representation of projection using gudhi.

    See: https://gudhi.inria.fr/python/latest/cover_complex_sklearn_isk_ref.html

    Members
    ------
    data: pd.DataFrame
        a pandas dataframe of raw data
    clean: pd.DataFrame
        a pandas dataframe of complete, scaled, and encoded data
    projection: np.narray
        a numpy array containing projection coordinates
    clusterer: list
        A list of length 2 containing clusterer name in pos 0, and kwargs in pos 1.
    mapper: gudhi.cover_complex.MapperComplex
        a mapper object
    starGraph: thema.multiverse.universe.starGraph class
        An expanded framework for analyzing networkx graphs

    Functions
    --------
    get_data_path() -> str
        returns path to raw data
    get_clean_path() -> str
        returns path to Moon object containing clean data
    get_projection_path()-> str
        returns path to Comet object containing projection data
    fit() -> None
        Computes a complex and corresponding starGraph
    get_unclustered_items() -> list
        returns list of unclustered items from HDBSCAN
    save() -> None
        Saves object as a .pkl file.

    """

    def __init__(
        self,
        data_path: str,
        clean_path: str,
        projection_path: str,
        clusterer: list,
        N: int = 100,
        beta: float = 0.0,
        C: float = 10.0,
    ):
        """
        Constructs an instance of gudhiStar

        Parameters
        ---------
        data_path : str
            A path to the raw data file.
        clean_path : str
            A path to a configured Moon object file.
        projection_path : str
            A path to a configured Comet object file.
        N: int
             subsampling iterations (default 100) for estimating scale and resolutions.
        beta: float
            exponent parameter (default 0.) for estimating scale and resolutions.
        C: float
            (float) – constant parameter (default 10.) for estimating scale and resolutions.
        clusterer: list
            A length 2 list containing in position 0 the name of the clusterer, and
            in position 1 the parameters to configure it.
            *Example*
            clusterer = ["HDBSCAN", {"minDist":0.1}]
        """
        super().__init__(
            data_path=data_path,
            clean_path=clean_path,
            projection_path=projection_path,
        )
        self.N = N
        self.C = C
        self.beta = beta
        self.clusterer = get_clusterer(clusterer)

        self.mapper = MapperComplex(
            input_type="point cloud",
            clustering=self.clusterer,
        )
        self.starGraph = None
        self.complex = None
        self.nodes = None


[docs]
    def fit(self, labels=None):
        """Constructs a cosmic Graph using gudhi's MapperComplex.

        Returns
        ------
        None
            Initializes starGraph member

        Warning
        ------
        Particular combinations of parameters can result in empty graphs or
        empty complexes.

        """
        self.mapper.fit(X=self.projection, filters=self.projection, colors=labels)
        graph = self.mapper.get_networkx(set_attributes_from_colors=bool(labels))
        for u, v in graph.edges():
            graph[u][v]["weight"] = 1
        self.complex = {"nodes": nx.get_node_attributes(graph, "membership")}
        self.nodes = convert_keys_to_alphabet(self.complex["nodes"])
        relabel_map = {
            old: new
            for old, new in zip(self.complex["nodes"].keys(), self.nodes.keys())
        }
        graph = nx.relabel_nodes(graph, relabel_map)
        nx.set_node_attributes(graph, self.nodes, "membership")
        # Update complex to use the new alphabetic keys (use copy to avoid reference issues)
        self.complex["nodes"] = self.nodes.copy()
        if len(graph) == 0:
            raise ValueError("Empty graph")

        else:
            self.starGraph = starGraph(graph)



[docs]
    def get_pseudoLaplacian(self, neighborhood="node"):
        """Calculates and returns a pseudo laplacian n by n matrix representing neighborhoods in the graph. Here, n corresponds to
        the number of items (ie rows in the clean data - keep in mind some raw data rows may have been dropped in cleaning). Here,
        the diagonal element A_ii represents the number of neighborhoods item i appears in. The element A_ij represent the number of
        neighborhoods both item i and j belong to.

        Parameters
        ----------
        neighborhood: str
            Specifies the type of neighborhood. For jmapStar, neighborhood options are 'node' or 'cc'
        """
        if self.starGraph is None:
            self.fit()

        return mapper_pseudo_laplacian(
            complex=self.complex,
            n=len(self.clean),
            components=self.starGraph.components,
            neighborhood=neighborhood,
        )



[docs]
    def get_unclustered_items(self):
        """Returns the list of items that were not clustered in the mapper fitting.

        Returns
        -------
        self._unclustered_item : list
           A list of unclustered item ids
        """
        return mapper_unclustered_items(len(self.clean), self.nodes)