Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 62823a7

Browse files
committedMar 15, 2022
Add calculate_cng_indices
1 parent 94ac334 commit 62823a7

File tree

1 file changed

+67
-1
lines changed

1 file changed

+67
-1
lines changed
 

‎factor_analyzer/factor_analyzer.py

+67-1
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,21 @@
88
"""
99

1010
import warnings
11+
from typing import Tuple
1112

1213
import numpy as np
1314
import pandas as pd
1415
import scipy as sp
1516
from scipy.optimize import minimize
1617
from scipy.stats import chi2, pearsonr
1718
from sklearn.base import BaseEstimator, TransformerMixin
19+
from sklearn.linear_models import LinearRegression
1820
from sklearn.utils import check_array
1921
from sklearn.utils.extmath import randomized_svd
2022
from sklearn.utils.validation import check_is_fitted
2123

2224
from .rotator import OBLIQUE_ROTATIONS, POSSIBLE_ROTATIONS, Rotator
23-
from .utils import corr, impute_values, partial_correlations, smc
25+
from .utils import corr, covariance_to_correlation, impute_values, partial_correlations, smc
2426

2527
POSSIBLE_SVDS = ['randomized', 'lapack']
2628

@@ -114,6 +116,70 @@ def calculate_bartlett_sphericity(x):
114116
return statistic, p_value
115117

116118

119+
def calculate_cng_indices(
120+
data: np.ndarray, model: str = "components"
121+
) -> Tuple[int, pd.DataFrame]:
122+
"""Calculate the Cattel-Nelson-Gorsuch indices, which are used to determine
123+
the appropriate number of factors for a factor analysis.
124+
125+
Direct port of nCng function from nFactors package:
126+
https://rdrr.io/cran/nFactors/man/nCng.html
127+
128+
Parameters
129+
----------
130+
data : array-like
131+
The array of samples x observable for which to calculate CNG indices
132+
model : str
133+
"components" or "factors"
134+
135+
Returns
136+
-------
137+
num_factors : int
138+
The number of components/factors to retain
139+
details : pd.DataFrame
140+
The eigenvalues and CNG indices of the dataset
141+
"""
142+
data = corr(data.values)
143+
if model == "factors":
144+
data -= np.linalg.pinv(np.diag(np.diag(np.linalg.pinv(data))))
145+
# TODO: Should this line be here?
146+
data = covariance_to_correlation(data)
147+
148+
values = np.sort(np.linalg.eigvals(data))[::-1]
149+
150+
num_variables = len(data)
151+
if num_variables < 6:
152+
raise ValueError("The number of variables must be at least 6")
153+
154+
fit_size = 3
155+
cng = np.diff(
156+
[
157+
[
158+
LinearRegression()
159+
.fit(idx_values[:, np.newaxis], values[idx_values])
160+
.coef_
161+
for idx_values in [
162+
np.arange(idx_fit, idx_fit + fit_size),
163+
np.arange(idx_fit + fit_size, idx_fit + 2 * fit_size),
164+
]
165+
]
166+
for idx_fit in range(num_variables - 2 * fit_size)
167+
],
168+
axis=1,
169+
).squeeze(axis=(1, 2))
170+
171+
num_factors = np.nanargmax(cng) + fit_size
172+
173+
details = pd.DataFrame(
174+
{
175+
"data": values[: len(cng)],
176+
"cng": cng,
177+
}
178+
)
179+
180+
return num_factors, details
181+
182+
117183
class FactorAnalyzer(BaseEstimator, TransformerMixin):
118184
"""
119185
A FactorAnalyzer class, which -

0 commit comments

Comments
 (0)
Please sign in to comment.