Source code for thema.multiverse.universe.stars.gudhiStar
# File: multiverse/universe/stars/gudhiStar.py
# Last Update: 11-19-25
# Updated by: JW
import networkx as nx
from gudhi.cover_complex import MapperComplex
from ..star import Star
from ..utils.starHelpers import (
convert_keys_to_alphabet,
get_clusterer,
mapper_unclustered_items,
mapper_pseudo_laplacian,
)
from ..utils.starGraph import starGraph
[docs]
def initialize():
"""Returns gudhiStar class from module."""
return gudhiStar
[docs]
class gudhiStar(Star):
"""
GUDHI Star Class
----------
- inherits from Star
Generates a graph representation of projection using gudhi.
See: https://gudhi.inria.fr/python/latest/cover_complex_sklearn_isk_ref.html
Members
------
data: pd.DataFrame
a pandas dataframe of raw data
clean: pd.DataFrame
a pandas dataframe of complete, scaled, and encoded data
projection: np.narray
a numpy array containing projection coordinates
clusterer: list
A list of length 2 containing clusterer name in pos 0, and kwargs in pos 1.
mapper: gudhi.cover_complex.MapperComplex
a mapper object
starGraph: thema.multiverse.universe.starGraph class
An expanded framework for analyzing networkx graphs
Functions
--------
get_data_path() -> str
returns path to raw data
get_clean_path() -> str
returns path to Moon object containing clean data
get_projection_path()-> str
returns path to Comet object containing projection data
fit() -> None
Computes a complex and corresponding starGraph
get_unclustered_items() -> list
returns list of unclustered items from HDBSCAN
save() -> None
Saves object as a .pkl file.
"""
def __init__(
self,
data_path: str,
clean_path: str,
projection_path: str,
clusterer: list,
N: int = 100,
beta: float = 0.0,
C: float = 10.0,
):
"""
Constructs an instance of gudhiStar
Parameters
---------
data_path : str
A path to the raw data file.
clean_path : str
A path to a configured Moon object file.
projection_path : str
A path to a configured Comet object file.
N: int
subsampling iterations (default 100) for estimating scale and resolutions.
beta: float
exponent parameter (default 0.) for estimating scale and resolutions.
C: float
(float) – constant parameter (default 10.) for estimating scale and resolutions.
clusterer: list
A length 2 list containing in position 0 the name of the clusterer, and
in position 1 the parameters to configure it.
*Example*
clusterer = ["HDBSCAN", {"minDist":0.1}]
"""
super().__init__(
data_path=data_path,
clean_path=clean_path,
projection_path=projection_path,
)
self.N = N
self.C = C
self.beta = beta
self.clusterer = get_clusterer(clusterer)
self.mapper = MapperComplex(
input_type="point cloud",
clustering=self.clusterer,
)
self.starGraph = None
self.complex = None
self.nodes = None
[docs]
def fit(self, labels=None):
"""Constructs a cosmic Graph using gudhi's MapperComplex.
Returns
------
None
Initializes starGraph member
Warning
------
Particular combinations of parameters can result in empty graphs or
empty complexes.
"""
self.mapper.fit(X=self.projection, filters=self.projection, colors=labels)
graph = self.mapper.get_networkx(set_attributes_from_colors=bool(labels))
for u, v in graph.edges():
graph[u][v]["weight"] = 1
self.complex = {"nodes": nx.get_node_attributes(graph, "membership")}
self.nodes = convert_keys_to_alphabet(self.complex["nodes"])
relabel_map = {
old: new
for old, new in zip(self.complex["nodes"].keys(), self.nodes.keys())
}
graph = nx.relabel_nodes(graph, relabel_map)
nx.set_node_attributes(graph, self.nodes, "membership")
# Update complex to use the new alphabetic keys (use copy to avoid reference issues)
self.complex["nodes"] = self.nodes.copy()
if len(graph) == 0:
raise ValueError("Empty graph")
else:
self.starGraph = starGraph(graph)
[docs]
def get_pseudoLaplacian(self, neighborhood="node"):
"""Calculates and returns a pseudo laplacian n by n matrix representing neighborhoods in the graph. Here, n corresponds to
the number of items (ie rows in the clean data - keep in mind some raw data rows may have been dropped in cleaning). Here,
the diagonal element A_ii represents the number of neighborhoods item i appears in. The element A_ij represent the number of
neighborhoods both item i and j belong to.
Parameters
----------
neighborhood: str
Specifies the type of neighborhood. For jmapStar, neighborhood options are 'node' or 'cc'
"""
if self.starGraph is None:
self.fit()
return mapper_pseudo_laplacian(
complex=self.complex,
n=len(self.clean),
components=self.starGraph.components,
neighborhood=neighborhood,
)
[docs]
def get_unclustered_items(self):
"""Returns the list of items that were not clustered in the mapper fitting.
Returns
-------
self._unclustered_item : list
A list of unclustered item ids
"""
return mapper_unclustered_items(len(self.clean), self.nodes)