Source code for thema.multiverse.universe.stars.jmapStar

# File: multiverse/universe/stars/jmapStar.py
# Last Update: 05/15/24
# Updated by: JW


import logging


import networkx as nx
from kmapper import Cover, KeplerMapper


from ..star import Star
from ..utils.starGraph import starGraph
from ..utils.starHelpers import (
    convert_keys_to_alphabet,
    mapper_pseudo_laplacian,
    mapper_unclustered_items,
    get_clusterer,
    Nerve,
)

# Configure module logger
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())


[docs] def initialize(): """ Returns jmapStar class from module.This is a general method that allows us to initialize arbitrary star objects. Returns ------- jmapStar : object The jMAP projectile object. """ return jmapStar
[docs] class jmapStar(Star): """ JMAP Star Class Our custom implementation of a Kepler Mapper (K-Mapper) into a Star object. Here we allow users to explore the topological structure of their data using the Mapper algorithm, which is a powerful tool for visualizing high-dimensional data. ---------- - inherts from Star Generates a graph representation of projection using Kepler Mapper. Members ------ data: pd.DataFrame a pandas dataframe of raw data clean: pd.DataFrame a pandas dataframe of complete, scaled, and encoded data projection: np.narray a numpy array containing projection coordinates nCubes: int kmapper paramter relating to covering of space percOverlap: float kmapper paramter relating to covering of space minIntersection: int number of shared items required to define an edge. Set to -1 to create a weighted graph. clusterer: function Clustering function passed to kmapper (e.g. HDBSCAN). mapper: kmapper.mapper A kmapper mapper object. complex: dict A dictionary specifying node membership starGraph: thema.multiverse.universe.utils.starGraph class An expanded framework for analyzing networkx graphs Functions -------- get_data_path() -> str returns path to raw data get_clean_path() -> str returns path to Moon object containing clean data get_projection_path()-> str returns path to Comet object contatining projection data fit() -> None Computes a complex and corresponding starGraph get_unclustered_items() -> list returns list of unclustered items from HDBSCAN save() -> None Saves object as a .pkl file. """ def __init__( self, data_path: str, clean_path: str, projection_path: str, nCubes: int, percOverlap: float, minIntersection: int, clusterer: list, ): """ Constructs an instance of jmapStar Parameters --------- data_path : str A path to the raw data file. clean_path : str A path to a cofigured Moon object file. projection_path : str A path to a configured Comet object file. nCubes: int Number of cubes used in kmapper cover. percOverlap: float Percent of cube overlap in kmapper cover. minIntersection: int Number of shared items across nodes to define an edge. Note: set to -1 for a weighted graph. clusterer: list A length 2 list containing in position 0 the name of the clusterer, and in position 1 the parameters to configure it. *Example* clusterer = ["HDBSCAN", {"minDist":0.1}] """ super().__init__( data_path=data_path, clean_path=clean_path, projection_path=projection_path, ) self.nCubes = nCubes self.percOverlap = percOverlap self.minIntersection = minIntersection self.clusterer = get_clusterer(clusterer) self.mapper = KeplerMapper() self.complex = None # Store parameters for potential debugging self._params = { "nCubes": nCubes, "percOverlap": percOverlap, "minIntersection": minIntersection, "clusterer": clusterer, }
[docs] def fit(self): """Computes a kmapper complex based on the configuration parameters and constructs a resulting graph. Returns ------ None Initializes complex and starGraph members Warning ------ Particular combinations of parameters can result in empty graphs or empty complexes. """ self.complex = self.mapper.map( lens=self.projection, X=self.projection, cover=Cover(self.nCubes, self.percOverlap), clusterer=self.clusterer, ) if not self.complex or "nodes" not in self.complex: logger.debug( f"KeplerMapper produced empty complex - params: {self._params}, " f"projection shape: {self.projection.shape}" ) self.complex = None self.starGraph = None return self.nodes = convert_keys_to_alphabet(self.complex["nodes"]) graph = nx.Graph() nerve = Nerve(minIntersection=self.minIntersection) # Fit Nerve to generate edges self.edges = nerve.compute(self.nodes) if len(self.edges) == 0: # Log when we get empty graphs - this is important for debugging logger.debug( f"No edges found in graph - params: {self._params}, " f"nodes: {len(self.nodes)}, projection shape: {self.projection.shape}" ) self.starGraph = starGraph(graph) # Create empty graph instead of None else: graph.add_nodes_from(self.nodes) nx.set_node_attributes(graph, self.nodes, "membership") if self.minIntersection == -1: graph.add_weighted_edges_from(self.edges) else: graph.add_edges_from(self.edges) self.starGraph = starGraph(graph)
[docs] def get_pseudoLaplacian(self, neighborhood="node"): """Calculates and returns a pseudo laplacian n by n matrix representing neighborhoods in the graph. Here, n corresponds to the number of items (ie rows in the clean data - keep in mind some raw data rows may have been dropped in cleaning). Here, the diagonal element A_ii represents the number of neighborhoods item i appears in. The element A_ij represent the number of neighborhoods both item i and j belong to. Parameters ---------- neighborhood: str Specifies the type of neighborhood. For jmapStar, neighborhood options are 'node' or 'cc' """ if self.complex is None: self.fit() return mapper_pseudo_laplacian( complex=self.complex, n=len(self.clean), components=self.starGraph.components, neighborhood=neighborhood, )
[docs] def get_unclustered_items(self): """ Returns the list of items that were not clustered in the mapper fitting. Returns ------- self._unclustered_item : list A list of unclustered item ids """ return mapper_unclustered_items(len(self.clean), self.nodes)