#!/usr/bin/env python2 # -*- coding:utf-8 -*- import numpy as np import matplotlib.pyplot as plt # 加载数据 def loadDataSet(): dataMat = [] labelMat = [] fr = open('testSet.txt') for line in fr.readlines(): lineArr = line.strip().split() dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])]) labelMat.append(int(lineArr[2])) fr.close() return dataMat, labelMat # sigmoid 激活函数 def sigmoid(intX): return 1.0 / (1 + np.exp(-intX)) # 梯度上升算法 def gradAscent(dataMatIn, classLabels): dataMatrix = np.mat(dataMatIn) labelMat = np.mat(classLabels).transpose() m, n = np.shape(dataMatrix) alpha = 0.001 maxCycles = 500 weights = np.ones((n, 1)) for k in range(maxCycles): h = sigmoid(dataMatrix * weights) error = labelMat - h weights = weights + alpha * dataMatrix.transpose() * error return weights.getA() def plotBeastFit(weights): dataMat, labelMat = loadDataSet() dataArr = np.array(dataMat) n = np.shape(dataArr)[0] xcord1 = [] xcord2 = [] ycode1 = [] ycode2 = [] for i in range(n): if int(labelMat[i]) == 1: xcord1.append(dataArr[i, 1]) ycode1.append(dataArr[i, 2]) else: xcord2.append(dataArr[i, 1]) ycode2.append(dataArr[i, 2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcord1, ycode1, s=20, c='red', marker='s', alpha=0.5) ax.scatter(xcord2, ycode2, s=20, c='green', alpha=0.5) x = np.arange(-3.0, 3.0, 0.1) y = (-weights[0] - weights[1] * x) / weights[2] ax.plot(x, y) plt.title('BestFit') plt.xlabel('X1') plt.ylabel('X2') plt.show() # 随机梯度上升算法 def stocGradAscent0(dataMatrix, classLabels): m, n = np.shape(dataMatrix) alpha = 0.01 weights = np.ones(n) for i in range(m): h = sigmoid(sum(dataMatrix[i] * weights)) error = classLabels[i] - h weights = weights + alpha * error * dataMatrix[i] return weights # 优化后的梯度上升算法 def stocGradAscent1(dataMatrix, classLabels, numIter=150): m, n = np.shape(dataMatrix) weights = np.ones(n) for j in range(numIter): dataIndex = range(m) for i in range(m): # alpha 每次都需要调整 alpha = 4 / (1.0 + j + i) + 0.01 # 随机选取跟新 rangeIndex = int(np.random.uniform(0, len(dataIndex))) h = sigmoid(sum(dataMatrix[rangeIndex] * weights)) error = classLabels[rangeIndex] - h weights = weights + alpha * error * dataMatrix[rangeIndex] return weights if __name__ == '__main__': dataMat, labelMat = loadDataSet() # weights = gradAscent(dataMat, labelMat) # weights = stocGradAscent0(np.array(dataMat), labelMat) weights = stocGradAscent1(np.array(dataMat), labelMat) plotBeastFit(weights)