Skip to content

Commit 202ff3b

Browse files
author
cclauss
committed
Modernize Python 2 code to get ready for Python 3
1 parent 0fe8d2e commit 202ff3b

8 files changed

+43
-37
lines changed

AnomalyDetection/AnomalyDetection.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#-*- coding: utf-8 -*-
22
# Author: Bob
33
# Date: 2016.12.22
4+
from __future__ import print_function
45
import numpy as np
56
from matplotlib import pyplot as plt
67
from scipy import io as spio
@@ -25,8 +26,8 @@ def anomalyDetection_example():
2526
yval = data['yval'] # y=1代表异常
2627
pval = multivariateGaussian(Xval, mu, sigma2) # 计算CV上的概率密度值
2728
epsilon,F1 = selectThreshold(yval,pval) # 选择最优的epsilon临界值
28-
print u'在CV上得到的最好的epsilon是:%e'%epsilon
29-
print u'对应的F1Score值为:%f'%F1
29+
print(u'在CV上得到的最好的epsilon是:%e'%epsilon)
30+
print(u'对应的F1Score值为:%f'%F1)
3031
outliers = np.where(p<epsilon) # 找到小于临界值的异常点,并作图
3132
plt.plot(X[outliers,0],X[outliers,1],'o',markeredgecolor='r',markerfacecolor='w',markersize=10.)
3233
plt = display_2d_data(X, 'bx')

K-Means/K-Menas.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#-*- coding: utf-8 -*-
2+
from __future__ import print_function
23
import numpy as np
34
from matplotlib import pyplot as plt
45
from matplotlib import colors
@@ -11,7 +12,7 @@
1112

1213
def KMeans():
1314
'''二维数据聚类过程演示'''
14-
print u'聚类过程展示...\n'
15+
print(u'聚类过程展示...\n')
1516
data = spio.loadmat("data.mat")
1617
X = data['X']
1718
K = 3 # 总类数
@@ -21,7 +22,7 @@ def KMeans():
2122
'''
2223
图片压缩
2324
'''
24-
print u'K-Means压缩图片\n'
25+
print(u'K-Means压缩图片\n')
2526
img_data = misc.imread("bird.png") # 读取图片像素数据
2627
img_data = img_data/255.0 # 像素值映射到0-1
2728
img_size = img_data.shape
@@ -31,21 +32,21 @@ def KMeans():
3132
max_iters = 5
3233
initial_centroids = kMeansInitCentroids(X,K)
3334
centroids,idx = runKMeans(X, initial_centroids, max_iters, False)
34-
print u'\nK-Means运行结束\n'
35-
print u'\n压缩图片...\n'
35+
print(u'\nK-Means运行结束\n')
36+
print(u'\n压缩图片...\n')
3637
idx = findClosestCentroids(X, centroids)
3738
X_recovered = centroids[idx,:]
3839
X_recovered = X_recovered.reshape(img_size[0],img_size[1],3)
3940

40-
print u'绘制图片...\n'
41+
print(u'绘制图片...\n')
4142
plt.subplot(1,2,1)
4243
plt.imshow(img_data)
4344
plt.title(u"原先图片",fontproperties=font)
4445
plt.subplot(1,2,2)
4546
plt.imshow(X_recovered)
4647
plt.title(u"压缩图像",fontproperties=font)
4748
plt.show()
48-
print u'运行结束!'
49+
print(u'运行结束!')
4950

5051

5152
# 找到每条数据距离哪个类中心最近
@@ -86,7 +87,7 @@ def runKMeans(X,initial_centroids,max_iters,plot_process):
8687
idx = np.zeros((m,1)) # 每条数据属于哪个类
8788

8889
for i in range(max_iters): # 迭代次数
89-
print u'迭代计算次数:%d'%(i+1)
90+
print(u'迭代计算次数:%d'%(i+1))
9091
idx = findClosestCentroids(X, centroids)
9192
if plot_process: # 如果绘制图像
9293
plt = plotProcessKMeans(X,centroids,previous_centroids) # 画聚类中心的移动过程

LinearRegression/LinearRegression.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
#-*- coding: utf-8 -*-
2+
from __future__ import print_function
23
import numpy as np
34
from matplotlib import pyplot as plt
45
from matplotlib.font_manager import FontProperties
56
font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14) # 解决windows环境下画图汉字乱码问题
67

78

89
def linearRegression(alpha=0.01,num_iters=400):
9-
print u"加载数据...\n"
10+
print(u"加载数据...\n")
1011

1112
data = loadtxtAndcsv_data("data.txt",",",np.float64) #读取数据
1213
X = data[:,0:-1] # X对应0到倒数第2列
@@ -19,7 +20,7 @@ def linearRegression(alpha=0.01,num_iters=400):
1920

2021
X = np.hstack((np.ones((m,1)),X)) # 在X前加一列1
2122

22-
print u"\n执行梯度下降算法....\n"
23+
print(u"\n执行梯度下降算法....\n")
2324

2425
theta = np.zeros((col,1))
2526
y = y.reshape(-1,1) #将行向量转化为列
@@ -73,7 +74,7 @@ def gradientDescent(X,y,theta,alpha,num_iters):
7374
temp[:,i] = theta - ((alpha/m)*(np.dot(np.transpose(X),h-y))) #梯度的计算
7475
theta = temp[:,i]
7576
J_history[i] = computerCost(X,y,theta) #调用计算代价函数
76-
print '.',
77+
print('.', end=' ')
7778
return theta,J_history
7879

7980
# 计算代价函数

LinearRegression/LinearRegression_scikit-learn.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#-*- coding: utf-8 -*-
2+
from __future__ import print_function
23
import numpy as np
34
from sklearn import linear_model
45
from sklearn.preprocessing import StandardScaler #引入归一化的包
56

67
def linearRegression():
7-
print u"加载数据...\n"
8+
print(u"加载数据...\n")
89
data = loadtxtAndcsv_data("data.txt",",",np.float64) #读取数据
910
X = np.array(data[:,0:-1],dtype=np.float64) # X对应0到倒数第2列
1011
y = np.array(data[:,-1],dtype=np.float64) # y对应最后一列
@@ -21,9 +22,9 @@ def linearRegression():
2122

2223
#预测结果
2324
result = model.predict(x_test)
24-
print model.coef_ # Coefficient of the features 决策函数中的特征系数
25-
print model.intercept_ # 又名bias偏置,若设置为False,则为0
26-
print result # 预测结果
25+
print(model.coef_) # Coefficient of the features 决策函数中的特征系数
26+
print(model.intercept_) # 又名bias偏置,若设置为False,则为0
27+
print(result) # 预测结果
2728

2829

2930
# 加载txt和csv文件

LogisticRegression/LogisticRegression.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#-*- coding: utf-8 -*-
2+
from __future__ import print_function
23
import numpy as np
34
import matplotlib.pyplot as plt
45
from scipy import optimize
@@ -19,7 +20,7 @@ def LogisticRegression():
1920

2021
J = costFunction(initial_theta,X,y,initial_lambda) #计算一下给定初始化的theta和lambda求出的代价J
2122

22-
print J #输出一下计算的值,应该为0.693147
23+
print(J) #输出一下计算的值,应该为0.693147
2324
#result = optimize.fmin(costFunction, initial_theta, args=(X,y,initial_lambda)) #直接使用最小化的方法,效果不好
2425
'''调用scipy中的优化算法fmin_bfgs(拟牛顿法Broyden-Fletcher-Goldfarb-Shanno)
2526
- costFunction是自己实现的一个求代价的函数,
@@ -29,7 +30,7 @@ def LogisticRegression():
2930
'''
3031
result = optimize.fmin_bfgs(costFunction, initial_theta, fprime=gradient, args=(X,y,initial_lambda))
3132
p = predict(X, result) #预测
32-
print u'在训练集上的准确度为%f%%'%np.mean(np.float64(p==y)*100) # 与真实值比较,p==y返回True,转化为float
33+
print(u'在训练集上的准确度为%f%%'%np.mean(np.float64(p==y)*100)) # 与真实值比较,p==y返回True,转化为float
3334

3435
X = data[:,0:-1]
3536
y = data[:,-1]

LogisticRegression/LogisticRegression_OneVsAll.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#-*- coding: utf-8 -*-
2+
from __future__ import print_function
23
import numpy as np
34
import matplotlib.pyplot as plt
45
import scipy.io as spio
@@ -27,7 +28,7 @@ def logisticRegression_OneVsAll():
2728
#res = np.hstack((p,y.reshape(-1,1)))
2829
#np.savetxt("predict.csv", res, delimiter=',')
2930

30-
print u"预测准确度为:%f%%"%np.mean(np.float64(p == y.reshape(-1,1))*100)
31+
print(u"预测准确度为:%f%%"%np.mean(np.float64(p == y.reshape(-1,1))*100))
3132

3233
# 加载mat文件
3334
def loadmat_data(fileName):

LogisticRegression/LogisticRegression_OneVsAll_scikit-learn.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#-*- coding: utf-8 -*-
2+
from __future__ import print_function
23
from scipy import io as spio
34
import numpy as np
45
from sklearn import svm
@@ -17,7 +18,7 @@ def logisticRegression_oneVsAll():
1718

1819
predict = model.predict(X) #预测
1920

20-
print u"预测准确度为:%f%%"%np.mean(np.float64(predict == y)*100)
21+
print(u"预测准确度为:%f%%"%np.mean(np.float64(predict == y)*100))
2122

2223
# 加载mat文件
2324
def loadmat_data(fileName):

LogisticRegression/LogisticRegression_scikit-learn.py

+16-17
Original file line numberDiff line numberDiff line change
@@ -4,40 +4,39 @@
44
import numpy as np
55

66
def logisticRegression():
7-
data = loadtxtAndcsv_data("data1.txt", ",", np.float64)
7+
data = loadtxtAndcsv_data("data1.txt", ",", np.float64)
88
X = data[:,0:-1]
99
y = data[:,-1]
10-
11-
# 划分为训练集和测试集
10+
11+
# ����Ϊѵ�����Ͳ��Լ�
1212
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
13-
14-
# 归一化
13+
14+
# ��һ��
1515
scaler = StandardScaler()
1616
scaler.fit(x_train)
1717
x_train = scaler.fit_transform(x_train)
1818
x_test = scaler.fit_transform(x_test)
19-
20-
#逻辑回归
19+
20+
#�߼��ع�
2121
model = LogisticRegression()
2222
model.fit(x_train,y_train)
23-
24-
# 预测
23+
24+
# Ԥ��
2525
predict = model.predict(x_test)
2626
right = sum(predict == y_test)
27-
28-
predict = np.hstack((predict.reshape(-1,1),y_test.reshape(-1,1))) # 将预测值和真实值放在一块,好观察
29-
print predict
30-
print ('测试集准确率:%f%%'%(right*100.0/predict.shape[0])) #计算在测试集上的准确度
3127

32-
# 加载txt和csv文件
28+
predict = np.hstack((predict.reshape(-1,1),y_test.reshape(-1,1))) # ��Ԥ��ֵ����ʵֵ����һ�飬�ù۲�
29+
print(predict)
30+
print('���Լ�׼ȷ�ʣ�%f%%'%(right*100.0/predict.shape[0])) #�����ڲ��Լ��ϵ�׼ȷ��
31+
32+
# ����txt��csv�ļ�
3333
def loadtxtAndcsv_data(fileName,split,dataType):
3434
return np.loadtxt(fileName,delimiter=split,dtype=dataType)
3535

36-
# 加载npy文件
36+
# ����npy�ļ�
3737
def loadnpy_data(fileName):
3838
return np.load(fileName)
3939

4040

41-
4241
if __name__ == "__main__":
43-
logisticRegression()
42+
logisticRegression()

0 commit comments

Comments
 (0)