Skip to content

Commit 195f51c

Browse files
committedOct 30, 2016
线性回归
1 parent fde0b95 commit 195f51c

File tree

5 files changed

+240
-0
lines changed

5 files changed

+240
-0
lines changed
 

‎LinearRegression/LinearRegression.py

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import numpy as np
2+
from matplotlib import pyplot as plt
3+
4+
5+
def linearRegression(alpha=0.01,num_iters=400):
6+
print "加载数据...\n"
7+
8+
data = loadtxtAndcsv_data("data.txt",",",np.float64) #读取数据
9+
X = data[:,0:-1] # X对应0到倒数第2列
10+
y = data[:,-1] # y对应最后一列
11+
m = len(y) # 总的数据条数
12+
col = data.shape[1] # data的列数
13+
14+
X,mu,sigma = featureNormaliza(X) # 归一化
15+
plot_X1_X2(X) # 画图看一下归一化效果
16+
17+
X = np.hstack((np.ones((m,1)),X)) # 在X前加一列1
18+
19+
print "\n执行梯度下降算法....\n"
20+
21+
theta = np.zeros((col,1))
22+
y = y.reshape(-1,1) #将行向量转化为列
23+
theta,J_history = gradientDescent(X, y, theta, alpha, num_iters)
24+
25+
plotJ(J_history, num_iters)
26+
27+
return mu,sigma,theta #返回均值mu,标准差sigma,和学习的结果theta
28+
29+
30+
# 加载txt和csv文件
31+
def loadtxtAndcsv_data(fileName,split,dataType):
32+
return np.loadtxt(fileName,delimiter=split,dtype=dataType)
33+
34+
# 加载npy文件
35+
def loadnpy_data(fileName):
36+
return np.load(fileName)
37+
38+
# 归一化feature
39+
def featureNormaliza(X):
40+
X_norm = np.array(X) #将X转化为numpy数组对象,才可以进行矩阵的运算
41+
#定义所需变量
42+
mu = np.zeros((1,X.shape[1]))
43+
sigma = np.zeros((1,X.shape[1]))
44+
45+
mu = np.mean(X_norm,0) # 求每一列的平均值(0指定为列,1代表行)
46+
sigma = np.std(X_norm,0) # 求每一列的标准差
47+
for i in range(X.shape[1]): # 遍历列
48+
X_norm[:,i] = (X_norm[:,i]-mu[i])/sigma[i] # 归一化
49+
50+
return X_norm,mu,sigma
51+
52+
# 画二维图
53+
def plot_X1_X2(X):
54+
plt.scatter(X[:,0],X[:,1])
55+
plt.show()
56+
57+
58+
# 梯度下降算法
59+
def gradientDescent(X,y,theta,alpha,num_iters):
60+
m = len(y)
61+
n = len(theta)
62+
63+
temp = np.matrix(np.zeros((n,num_iters))) # 暂存每次迭代计算的theta,转化为矩阵形式
64+
65+
66+
J_history = np.zeros((num_iters,1)) #记录每次迭代计算的代价值
67+
68+
for i in range(num_iters): # 遍历迭代次数
69+
h = np.dot(X,theta) # 计算内积,matrix可以直接乘
70+
temp[:,i] = theta - ((alpha/m)*(np.dot(np.transpose(X),h-y))) #梯度的计算
71+
theta = temp[:,i]
72+
J_history[i] = computerCost(X,y,theta) #调用计算代价函数
73+
print '.',
74+
return theta,J_history
75+
76+
# 计算代价函数
77+
def computerCost(X,y,theta):
78+
m = len(y)
79+
J = 0
80+
81+
J = (np.transpose(X*theta-y))*(X*theta-y)/(2*m) #计算代价J
82+
return J
83+
84+
# 画每次迭代代价的变化图
85+
def plotJ(J_history,num_iters):
86+
x = np.arange(1,num_iters+1)
87+
plt.plot(x,J_history)
88+
plt.xlabel("num_iters")
89+
plt.ylabel("J")
90+
plt.show()
91+
92+
# 测试linearRegression函数
93+
def testLinearRegression():
94+
mu,sigma,theta = linearRegression(0.01,400)
95+
print "\n计算的theta值为:\n",theta
96+
print "\n预测结果为:%f"%predict(mu, sigma, theta)
97+
98+
# 测试学习效果(预测)
99+
def predict(mu,sigma,theta):
100+
result = 0
101+
# 注意归一化
102+
predict = np.array([1650,3])
103+
norm_predict = (predict-mu)/sigma
104+
final_predict = np.hstack((np.ones((1)),norm_predict))
105+
106+
result = np.dot(final_predict,theta) # 预测结果
107+
return result
108+
109+
110+
if __name__ == "__main__":
111+
testLinearRegression()

‎LinearRegression/data.csv

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2104,3,399900
2+
1600,3,329900
3+
2400,3,369000
4+
1416,2,232000
5+
3000,4,539900
6+
1985,4,299900
7+
1534,3,314900
8+
1427,3,198999
9+
1380,3,212000
10+
1494,3,242500
11+
1940,4,239999
12+
2000,3,347000
13+
1890,3,329999
14+
4478,5,699900
15+
1268,3,259900
16+
2300,4,449900
17+
1320,2,299900
18+
1236,3,199900
19+
2609,4,499998
20+
3031,4,599000
21+
1767,3,252900
22+
1888,2,255000
23+
1604,3,242900
24+
1962,4,259900
25+
3890,3,573900
26+
1100,3,249900
27+
1458,3,464500
28+
2526,3,469000
29+
2200,3,475000
30+
2637,3,299900
31+
1839,2,349900
32+
1000,1,169900
33+
2040,4,314900
34+
3137,3,579900
35+
1811,4,285900
36+
1437,3,249900
37+
1239,3,229900
38+
2132,4,345000
39+
4215,4,549000
40+
2162,4,287000
41+
1664,2,368500
42+
2238,3,329900
43+
2567,4,314000
44+
1200,3,299000
45+
852,2,179900
46+
1852,4,299900
47+
1203,3,239500

‎LinearRegression/data.npy

644 Bytes
Binary file not shown.

‎LinearRegression/data.txt

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2104,3,399900
2+
1600,3,329900
3+
2400,3,369000
4+
1416,2,232000
5+
3000,4,539900
6+
1985,4,299900
7+
1534,3,314900
8+
1427,3,198999
9+
1380,3,212000
10+
1494,3,242500
11+
1940,4,239999
12+
2000,3,347000
13+
1890,3,329999
14+
4478,5,699900
15+
1268,3,259900
16+
2300,4,449900
17+
1320,2,299900
18+
1236,3,199900
19+
2609,4,499998
20+
3031,4,599000
21+
1767,3,252900
22+
1888,2,255000
23+
1604,3,242900
24+
1962,4,259900
25+
3890,3,573900
26+
1100,3,249900
27+
1458,3,464500
28+
2526,3,469000
29+
2200,3,475000
30+
2637,3,299900
31+
1839,2,349900
32+
1000,1,169900
33+
2040,4,314900
34+
3137,3,579900
35+
1811,4,285900
36+
1437,3,249900
37+
1239,3,229900
38+
2132,4,345000
39+
4215,4,549000
40+
2162,4,287000
41+
1664,2,368500
42+
2238,3,329900
43+
2567,4,314000
44+
1200,3,299000
45+
852,2,179900
46+
1852,4,299900
47+
1203,3,239500

‎LinearRegression/readme.md

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
线性回归算法
2+
=======
3+
### 一、文件说明
4+
- [main.m][1.1]
5+
- 主运行程序
6+
- [featureNormalize.m][1.2]
7+
- 特征向量归一化函数
8+
- [gradientDescent.m][1.3]
9+
- 梯度下降求解函数
10+
- [computerCost.m][1.4]
11+
- 计算代价J函数
12+
- [normalEquations.m][1.5]
13+
- 正规方程求解函数
14+
15+
### 二、重要文件说明
16+
- main.m
17+
- 注意合理修改学习速率参数`alpha`
18+
- 归一化数据,预测时也需要归一化(因为是归一化之后求出的theta参数)
19+
20+
### 三、测试数据
21+
- 代价函数随迭代次数收敛
22+
![线性回归][3.1]
23+
24+
25+
26+
27+
28+
[1.1]:main.m
29+
[1.2]:featureNormalize.m
30+
[1.3]:gradientDescent.m
31+
[1.4]:computerCost.m
32+
[1.5]:normalEquations.m
33+
34+
35+
[3.1]: ../images/LinearRegression_01.png "LinearRegression_01.png"

0 commit comments

Comments
 (0)
Please sign in to comment.