71 lines
1.9 KiB
Python
71 lines
1.9 KiB
Python
import numpy as np
|
|
from matplotlib import pyplot as plt
|
|
from sklearn.decomposition import PCA
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
|
np.set_printoptions(linewidth=1000, edgeitems=4)
|
|
|
|
|
|
def print_heading(text):
|
|
print(f" {text} ".center(80, "-"))
|
|
|
|
|
|
data = np.loadtxt("data.txt")
|
|
labels = ["score", "age", "view_count", "body_length", "answer_count", "comment_count", "favourite_count"]
|
|
print_heading("raw data") ############################
|
|
|
|
print(data)
|
|
print_heading("scaled") ############################
|
|
|
|
scaler = StandardScaler()
|
|
scaler.fit(data)
|
|
x = scaler.transform(data)
|
|
# x=data
|
|
print(x)
|
|
n = 7
|
|
pca = PCA(n_components=n)
|
|
pca.fit(x)
|
|
print_heading("components") ############################
|
|
print(pca.components_.shape) # eigenvectors of covariance matrix
|
|
print(pca.components_)
|
|
print_heading("explained_variance") ############################
|
|
|
|
print(pca.explained_variance_) # n largest eigenvalues of covariance matrix
|
|
print(pca.explained_variance_ratio_, "(as ratio)")
|
|
print_heading("covariance") ############################
|
|
|
|
cov = np.cov(x.T)
|
|
print(pca.get_covariance())
|
|
print(np.allclose(pca.get_covariance(),cov))
|
|
print(pca.get_covariance().shape) # eigenvectors
|
|
print(pca.get_covariance())
|
|
|
|
print_heading("transformed") ############################
|
|
|
|
x_new = pca.transform(x)
|
|
print(x_new.shape)
|
|
print(x_new)
|
|
print_heading("inverse transformed and undone scale") ############################
|
|
|
|
x_simple = scaler.inverse_transform(pca.inverse_transform(x_new))
|
|
|
|
print(x_simple.shape)
|
|
print(x_simple)
|
|
|
|
print(pca.explained_variance_)
|
|
|
|
plt.scatter(data[::, 0], data[::, 4], s=1)
|
|
plt.scatter(x_simple[::, 0], x_simple[::, 4], s=1)
|
|
plt.show()
|
|
|
|
plt.plot(np.cumsum(pca.explained_variance_ratio_))
|
|
plt.show()
|
|
|
|
# plot correclation matrix
|
|
cov = pca.get_covariance()
|
|
plt.matshow(cov)
|
|
plt.xticks(range(len(labels)), labels, rotation=90)
|
|
plt.yticks(range(len(labels)), labels)
|
|
plt.colorbar()
|
|
|
|
plt.show()
|