Coverage for sleapyfaces/normalize.py: 48%
25 statements
« prev ^ index » next coverage.py v7.0.2, created at 2023-01-03 12:07 -0800
« prev ^ index » next coverage.py v7.0.2, created at 2023-01-03 12:07 -0800
1import pandas as pd
2from sklearn.decomposition import PCA
5def mean_center(data: pd.DataFrame, track_names: list[str]) -> pd.DataFrame:
6 """Mean center the data.
8 Args:
9 data (pd.DataFrame): The data to be mean centered.
10 track_names (list[str]): The names of the tracks to mean center.
12 Returns:
13 pd.DataFrame: The mean centered data."""
14 num_data = data.loc[:, track_names]
15 num_data = num_data - num_data.mean()
16 data.loc[:, track_names] = num_data
17 return data
20def z_score(data: pd.DataFrame, track_names: list[str]) -> pd.DataFrame:
21 """z-score the data.
23 Args:
24 data (pd.DataFrame): The data to be z-scored.
25 track_names (list[str]): The names of the tracks to be z-scored.
27 Returns:
28 pd.DataFrame: The z-scored data."""
29 data = mean_center(data, track_names)
30 for track in track_names:
31 data.loc[:, track] = data.loc[:, track] / data.loc[:, track].std()
32 return data
35def pca(data: pd.DataFrame, track_names: list[str]) -> dict[str, pd.DataFrame]:
36 """Runs 2D and 3D PCA dimensionality reduction on the data.
38 Args:
39 data (pd.DataFrame): The data to be reduced.
40 track_names (list[str]): The names of the tracks to be reduced.
42 Returns:
43 dict[str, pd.DataFrame]: The reduced data with keys "pca2d" and "pca3d"."""
44 num_data = data.loc[:, track_names]
45 qual_data = data.drop(columns=track_names)
46 pcas = {}
48 pca2d = PCA(n_components=2)
49 pca3d = PCA(n_components=3)
51 num_data_2d = pca2d.fit_transform(num_data)
52 num_data_3d = pca3d.fit_transform(num_data)
54 num_data_2d = pd.DataFrame(
55 num_data_2d, columns=["principal component 1", "principal component 2"]
56 )
57 num_data_3d = pd.DataFrame(
58 num_data_3d,
59 columns=[
60 "principal component 1",
61 "principal component 2",
62 "principal component 3",
63 ],
64 )
66 pcas["pca2d"] = pd.concat([qual_data.reset_index(), num_data_2d], axis=1)
67 pcas["pca3d"] = pd.concat([qual_data.reset_index(), num_data_3d], axis=1)
69 return pcas