Source code for thema.multiverse.system.outer.projectiles.pcaProj

# File: multiverse/system/outer/projectiles/pcaProj.py
# Last Update: 05/15/24
# Updated by: JW

from sklearn.decomposition import PCA

from ..comet import Comet


[docs] def initialize(): """ Returns the pcaeProj class object from module. This is a general method that allows us to initialize arbitrary projectile objects. Returns ------- pcaProj : object The PCA projectile object. """ return pcaProj
[docs] class pcaProj(Comet): """ PCA Projectile Class. Inherits from Comet. Projects data into lower dimensional space using sklearn's PCA Projection. See: https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html Attributes ---------- data : pd.DataFrame A pandas dataframe of raw data. clean : pd.DataFrame A pandas dataframe of complete, encoded, and scaled data. projectionArray : np.array A projection array. dimensions : int Number of dimensions for the embedding. seed : int Seed for randomization. Methods ------- __init__(self, data_path, clean_path, dimensions, seed) Constructs a pcaProj instance. fit(self) Performs a PCA projection based on the configuration parameters. save(self) Saves pcaProj to `.pkl` serialized object file. """
[docs] def __init__(self, data_path, clean_path, dimensions, seed): """ Constructs a pcaProj instance. Parameters ---------- data_path : str The path to the data file. clean_path : str The path to save the cleaned data file. dimensions : int The number of dimensions for the embedding. seed : int The seed for randomization. """ super().__init__(data_path=data_path, clean_path=clean_path) self.dimensions = dimensions self.seed = seed self.projectionArray = None
[docs] def fit(self): """ Performs a PCA projection based on the configuration parameters. Returns ------- None Initializes projectionArray member. """ data = self.clean pca = PCA(n_components=self.dimensions, random_state=self.seed) self.projectionArray = pca.fit_transform(data)