吴恩达 machine learning 编程作业 python实现 ex7_pca

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

# -*- coding: utf-8 -*-
"""
Created on Fri Jul 3 23:50:18 2020

@author: cheetah023
"""

import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sci

def featureNormalize(X):
mu = np.mean(X,axis=0)
sigma = np.std(X,axis=0,ddof=1)
X_norm = (X - mu) / sigma
return X_norm, mu, sigma
def pca(X):
sigma = np.dot(X.T, X) / X.shape[0]
U,S,V = np.linalg.svd(sigma)
return U,S
def projectData(X, U, K):
Z = np.dot(X,U[:,0:K])
return Z
def recoverData(Z, U, K):
X = np.dot(Z,U[:,0:K].T)
return X
def displayData(X, row, col):
fig, axs = plt.subplots(row, col, figsize=(8,8))
for r in range(row):
for c in range(col):
axs[r][c].imshow(X[r*col + c].reshape(32,32).T, cmap = 'Greys_r')
axs[r][c].set_xticks([])
axs[r][c].set_yticks([])
#Part 1: Load Example Dataset
data = sci.loadmat('ex7data1.mat')
#print('data.keys',data.keys())
X = data['X']
print('X',X.shape)
plt.figure(0)
plt.scatter(X[:,0],X[:,1],marker='o',edgecolors='b', facecolors='none')

#Part 2: Principal Component Analysis
[X_norm, mu, sigma] = featureNormalize(X)
#print('mu',mu)
#print('sigma',sigma)
#print('X_norm',X_norm)
[U, S] = pca(X_norm)
print('U:',U.shape)
print('S:',S.shape)
plt.plot([mu[0],mu[0] + 1.5 * S[0] * U[0,0]],
[mu[1],mu[1] + 1.5 * S[0] * U[0,1]],c='r')
plt.plot([mu[0],mu[0] + 1.5 * S[1] * U[1,0]],
[mu[1],mu[1] + 1.5 * S[1] * U[1,1]],c='c')
print('Top eigenvector:');
print('U(:,0) =',U[0,0],U[1,0]);
print('(you should expect to see -0.707107 -0.707107)');

#Part 3: Dimension Reduction
plt.figure(2)
plt.scatter(X_norm[:,0],X_norm[:,1],marker='o',edgecolors='b', facecolors='none')

K = 1
Z = projectData(X_norm, U, K)
print('\nProjection of the first example:', Z[0]);
print('(this value should be about 1.481274)');

X_rec = recoverData(Z, U, K)
print('\nApproximation of the first example:', X_rec[0,0], X_rec[0,1])
print('(this value should be about -1.047419 -1.047419)')

plt.scatter(X_rec[:,0],X_rec[:,1],marker='o',edgecolors='r', facecolors='none')
for i in range(0,X.shape[0]):
plt.plot([X_norm[i,0],X_rec[i,0]],[X_norm[i,1],X_rec[i,1]],'k--')

#Part 4: Loading and Visualizing Face Data
data = sci.loadmat('ex7faces.mat')
#print('data.keys',data.keys())
X = data['X']
print('X',X.shape)
plt.figure(3)
displayData(X,10,10)
plt.suptitle('original faces')

#Part 5: PCA on Face Data: Eigenfaces
[X_norm, mu, sigma] = featureNormalize(X)
[U, S] = pca(X_norm)
print('U:',U.shape)
print('S:',S.shape)
plt.figure(4)
displayData(U.T,6,6)
plt.suptitle('eigenvectors faces')

#Part 6: Dimension Reduction for Faces
K = 100
Z = projectData(X_norm, U, K)
print('The projected data Z has a size of',Z.shape)

#Part 7: Visualization of Faces after PCA Dimension Reduction
K = 100
X_rec = recoverData(Z, U, K)
plt.figure(5)
displayData(X_rec,10,10)
plt.suptitle('Recovered faces')

运行结果：

X (50, 2)
U: (2, 2)
S: (2,)
Top eigenvector:
U(:,0) = -0.7071067811865475 -0.7071067811865477
(you should expect to see -0.707107 -0.707107)

Projection of the first example: [1.48127391]
(this value should be about 1.481274)

Approximation of the first example: -1.0474188259204964 -1.0474188259204966
(this value should be about -1.047419 -1.047419)
X (5000, 1024)
U: (1024, 1024)
S: (1024,)
The projected data Z has a size of (5000, 100)