Modifying code from binary classifier logistic regression to multi-class “one vs all” logistic regression
我是机器学习的新成员,并尝试练习不同的算法,目前,我使用Logistic回归对从sklearn生成的随机数据集进行分类。现在,这是一个二进制分类器,但是我想使用多类Logistic回归"一对多"方法(稍后进行比较)。
下面是我尝试实现的用于二进制分类的代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | import numpy as np import matplotlib.pyplot as plt import sklearn import random from sklearn import datasets from sklearn.preprocessing import StandardScaler from sklearn.datasets import make_blobs X, t = make_blobs(n_samples=[400, 800, 400], centers=[[0,0], [1,2], [2,3]], n_features=2, random_state=2019) indices = np.arange(X.shape[0]) random.seed(2020) random.shuffle(indices) indices[:10] X_train = X[indices[:800], :] X_val = X[indices[800:1200], :] X_test = X[indices[1200:], :] t_train = t[indices[:800]] t_val = t[indices[800:1200]] t_test = t[indices[1200:]] t2_train = t_train == 1 t2_train = t2_train.astype('int') t2_val = (t_val == 1).astype('int') t2_test = (t_test == 1).astype('int') def add_bias(X): # Put bias in position 0 sh = X.shape if len(sh) == 1: #X is a vector return np.concatenate([np.array([1]), X]) else: # X is a matrix m = sh[0] bias = np.ones((m, 1)) # Makes a m*1 matrix of 1-s return np.concatenate([bias, X], axis = 1) class NumpyClassifier(): # Common methods to all numpy classifiers --- if any def accuracy(self, X_val, t_val, **kwargs): pred = self.predict(X_val, **kwargs) if len(pred.shape) > 1: pred = pred[:, 0] return sum(pred==t_val)/len(pred) # code for Logistic Regression def logistic(x): return 1/(1+np.exp(-x)) class NumpyLogReg(NumpyClassifier): def fit(self, X_train, t_train, gamma = 0.1, epochs=10): # X_train is a Nxm matrix, N data points, m features # t_train are the targets values for training data (k, m) = X_train.shape X_train = add_bias(X_train) self.theta = theta = np.zeros(m+1) for e in range(epochs): theta -= gamma / k * X_train.T @ (self.forward(X_train) - t_train) def forward(self, X_val): return logistic(X_val @ self.theta) def score(self, X_val): z = add_bias(X_val) score = self.forward(z) return score def predict(self, X_val, threshold=0.5): z = add_bias(X_val) score = self.forward(z) # score = z @ self.theta return (score>threshold).astype('int') lr_cl = NumpyLogReg() lr_cl.fit(X_train, t_train) lr_cl.predict(X_val) lr_cl.accuracy(X_val, t_val) for e in [1, 2, 5, 10, 50, 100, 1000, 10000, 100000, 1000000]: lr_cl = NumpyLogReg() lr_cl.fit(X_train, t_train, epochs=e, gamma=0.00001) print("{:10} {:7.3f}".format(e, lr_cl.accuracy(X_val, t_val))) |
我需要有关如何将代码修改为多类"一对所有" /"一对rest"逻辑回归的建议/提示。
我不想直接使用sklearn导入的逻辑回归算法,而是像这样从头开始。
我们非常感谢您提出的任何建议。 pb>
我认为
让我们假设数据集具有3个类