深圳创新网站建设,聊城网站建设招聘,网站优化应该怎么做,宁波网站建设营销推广公众号#xff1a;尤而小屋编辑#xff1a;Peter作者#xff1a;Peter 大家好#xff0c;我是Peter~
继续更新机器学习扩展包MLxtend的文章。本文介绍如何使用MLxtend来绘制与分类模型相关的决策边界decision_regions。
导入库
导入相关用于数据处理和建模的库#xff… 公众号尤而小屋编辑Peter作者Peter 大家好我是Peter~
继续更新机器学习扩展包MLxtend的文章。本文介绍如何使用MLxtend来绘制与分类模型相关的决策边界decision_regions。
导入库
导入相关用于数据处理和建模的库
import numpy as np
import pandas as pdimport matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib import cm
plt.rcParams[font.sans-serif][SimHei] #用来正常显示中文标签
plt.rcParams[axes.unicode_minus] False #用来正常显示负号import itertoolsfrom sklearn import datasets
from sklearn.linear_model import LogisticRegression # 逻辑回归分类
from sklearn.svm import SVC # SVC
from sklearn.ensemble import RandomForestClassifier # 随机森林分类
from mlxtend.classifier import EnsembleVoteClassifier # 从mlxtend导入集成投票表决分类算法
from mlxtend.data import iris_data # 内置数据集
from mlxtend.plotting import plot_decision_regions # 绘制决策边界import warnings
warnings.filterwarnings(ignore)1维决策边界Decision regions in 1D
X,y iris_data()
X[:3] # names [sepal length, sepal width,petal length, petal width]array([[5.1, 3.5, 1.4, 0.2],[4.9, 3. , 1.4, 0.2],[4.7, 3.2, 1.3, 0.2]])X X[:,2] # 只取第二个特征
# X X[:,None] # 转成2维数组下同
X X.reshape(-1,1)
X[:5]array([[1.4],[1.4],[1.3],[1.5],[1.4]])建立模型
svm SVC(C0.5,kernellinear)
svm.fit(X,y)绘制决策边界图形
plot_decision_regions(X,y,clfsvm,legend2)plt.xlabel(sepal width)
plt.title(SVM on Iris Datasets based on 1D)
plt.show()2维决策边界Decision regions in 2D
X,y iris_data()
X X[:,:2] # 选择两个特征用于建模和可视化
X[:10]输出结果为
array([[5.1, 3.5],[4.9, 3. ],[4.7, 3.2],[4.6, 3.1],[5. , 3.6],[5.4, 3.9],[4.6, 3.4],[5. , 3.4],[4.4, 2.9],[4.9, 3.1]])建立模型
svm SVC(C0.5,kernellinear)
svm.fit(X,y)绘制决策边界图形
plot_decision_regions(X,y,clfsvm,legend2)plt.xlabel(sepal length)
plt.ylabel(sepal width)
plt.title(SVM on Iris Datasets based on 2D)
plt.show()多模型决策边界Decision Region Grids
# 导入4个分类模型
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVCimport numpy as npimport matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
plt.rcParams[font.sans-serif][SimHei] #用来正常显示中文标签
plt.rcParams[axes.unicode_minus] False #用来正常显示负号import itertoolsfrom mlxtend.data import iris_data # 内置数据集
from mlxtend.plotting import plot_decision_regions4个模型的初始化
clf1 LogisticRegression(random_state1,solvernewton-cg,multi_classmultinomial)
clf2 RandomForestClassifier(random_state1, n_estimators100)
clf3 GaussianNB()
clf4 SVC(gammaauto)导入数据集
X,y iris_data()
X X[:,:2] # 选择2个特征建模4个模型的迭代训练与可视化
gs gridspec.GridSpec(2,2) # 2*2的网格面fig plt.figure(figsize(10,8))labels [Logistic Regression, Random Forest, Naive Bayes, SVM]for clf,lab,grd in zip([clf1, clf2, clf3, clf4],labels,itertools.product([0,1], repeat2)):clf.fit(X,y)ax plt.subplot(gs[grd[0], grd[1]])fig plot_decision_regions(XX, yy, clfclf, legend2)plt.title(lab)plt.show()高亮测试数据集Highlighting test data
from mlxtend.plotting import plot_decision_regions
from mlxtend.data import iris_data # 内置数据集
from mlxtend.plotting import plot_decision_regionsimport matplotlib.pyplot as plt
plt.rcParams[font.sans-serif][SimHei] #用来正常显示中文标签
plt.rcParams[axes.unicode_minus] False #用来正常显示负号from sklearn.svm import SVC
from sklearn.model_selection import train_test_split导入数据集并切分
X,y iris_data()
X X[:,:2] # 选择前2个特征建模# 切分数据集
X_train, X_test, y_train, y_test train_test_split(X, y, test_size0.33, random_state0)模型训练
svm SVC(C0.5, kernellinear)
svm.fit(X_train, y_train)plot_decision_regions(X, y, clfsvm, legend2, X_highlightX_test)plt.xlabel(sepal length)
plt.ylabel(petal length)
plt.title(SVM on Iris with Highlighting Test Data Points)
plt.show()评估分类器在非线性问题的表现Evaluating Classifier Behavior on Non-Linear Problems
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspecimport itertools
from mlxtend.plotting import plot_decision_regions
import numpy as npfrom sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC# 定义4个模型
clf1 LogisticRegression(random_state1, solverlbfgs)
clf2 RandomForestClassifier(n_estimators100, random_state1)
clf3 GaussianNB()
clf4 SVC(gammaauto)XOR问题
X np.random.randn(300, 2) # 300*2符合正态分布的数组
X[:5]array([[-1.96399101, -0.13610581],[-1.4832503 , -0.01927823],[-2.32101114, 0.09310347],[ 1.85377755, 0.08739847],[-1.26535948, 0.75706403]])# np.logical_xor用于计算两个布尔数组之间的逐元素逻辑异或。当两个输入数组中的元素相同为False当不同时结果为True。y np.array(np.logical_xor(X[:, 0] 0, X[:, 1] 0), # 两个特征的是否都大于0使用异或的结果dtypeint)y[:10] # 0-表示False1-表示Truearray([0, 0, 1, 0, 1, 1, 1, 0, 1, 0])gs gridspec.GridSpec(2, 2) # 创建2*2的网格布局fig plt.figure(figsize(10,8)) # 图像大小
labels [Logistic Regression, Random Forest, Naive Bayes, SVM] # 模型名称for clf, lab, grd in zip([clf1, clf2, clf3, clf4], # 模型clf 名称lab 位置grd00,01,10,11labels,itertools.product([0, 1], repeat2)):clf.fit(X, y) # 模型拟合ax plt.subplot(gs[grd[0], grd[1]]) # grd[0]-row grd[1]-columnfig plot_decision_regions(XX, yy, clfclf, legend2) # 绘制决策边界plt.title(lab) # 模型名称plt.show()半月数据集的分类Half-Moons
make_moons是Scikit-learn库中的一个函数用于生成具有两个弯月形状的数据集。它通常用于测试分类算法在非线性可分数据上的性能。
该函数的基本用法如下
from sklearn.datasets import make_moonsX, y make_moons(n_samples100, noise0.1, random_state42)其中n_samples参数指定生成的数据点数量noise参数指定数据的噪声水平0表示无噪声越大表示噪声越多random_state参数用于设置随机数生成器的种子以确保结果的可重复性。
from sklearn.datasets import make_moons
X, y make_moons(n_samples100, random_state123) # 生成弯月数据集gs gridspec.GridSpec(2, 2)fig plt.figure(figsize(10,8))labels [Logistic Regression, Random Forest, Naive Bayes, SVM]
for clf, lab, grd in zip([clf1, clf2, clf3, clf4],labels,itertools.product([0, 1], repeat2)):clf.fit(X, y)ax plt.subplot(gs[grd[0], grd[1]])fig plot_decision_regions(XX, yy, clfclf, legend2)plt.title(lab)plt.show()同心圆数据的分类Concentric Circles
from sklearn.datasets import make_circles # 生成同心圆数据集
X, y make_circles(n_samples1000, random_state123, noise0.1, factor0.2)gs gridspec.GridSpec(2, 2)fig plt.figure(figsize(10,8))labels [Logistic Regression, Random Forest, Naive Bayes, SVM]
for clf, lab, grd in zip([clf1, clf2, clf3, clf4],labels,itertools.product([0, 1], repeat2)):clf.fit(X, y)ax plt.subplot(gs[grd[0], grd[1]])fig plot_decision_regions(XX, yy, clfclf, legend2)plt.title(lab)plt.show()基于子图的分类决策边界
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
from mlxtend.data import iris_data # 内置数据集from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn import datasets
import numpy as npX,y iris_data()
X X[:,2]
X np.array(X).reshape(-1,1)建立两个模型并训练
clf1 LogisticRegression(random_state1,solverlbfgs,multi_classmultinomial)clf2 GaussianNB()
clf1.fit(X, y)
clf2.fit(X, y)创建图形对象fig和ax绘图对象
fig, axes plt.subplots(1,2,figsize(10,3)) # 创建1*2的图形fig plot_decision_regions(XX, yy, clfclf1, axaxes[0], legend2)
fig plot_decision_regions(XX, yy, clfclf2, axaxes[1], legend1)plt.show()基于多特征的决策边界
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVCX, y datasets.make_blobs(n_samples600, # 样本数n_features3, # 特征数centers[[2, 2, -2],[-2, -2, 2]], # 聚类中心cluster_std[2, 2], # 聚类方差random_state2 # 随机种子
)建立SVM模型并训练
svm SVC(gammaauto)
svm.fit(X,y)fig, ax plt.subplots()value 1.5
width 0.75plot_decision_regions(X,y,clfsvm,# Filler values must be provided when X has more than 2 training features.# 多个特征该参数必须有filler_feature_values{2: value}, filler_feature_ranges{2: width},legend2,axax)ax.set_xlabel(Feature1)
ax.set_ylabel(Feature2)
ax.set_title(Feature3{}.format(value))fig.suptitle(SVM on make_blobs)plt.show()决策边界的网格切片
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVCX, y datasets.make_blobs(n_samples600, # 样本数n_features3, # 特征数centers[[2, 2, -2],[-2, -2, 2]], # 聚类中心cluster_std[2, 2], # 聚类方差random_state2 # 随机种子
)# 模型训练
svm SVC(gammaauto)
svm.fit(X,y)fig, axarr plt.subplots(2, 2, figsize(10,8), sharexTrue, shareyTrue)
values [-4.0, -1.0, 1.0, 4.0]
width 0.75for value, ax in zip(values, axarr.flat):plot_decision_regions(X,y,clfsvm,filler_feature_values{2: value},filler_feature_ranges{2: width},legend2,axax)ax.set_xlabel(Feature1)ax.set_ylabel(Feature2)ax.set_title(Feature3{}.format(value))fig.suptitle(SVM on make_blobs)
plt.show()自定义绘图风格
from mlxtend.plotting import plot_decision_regions
from mlxtend.data import iris_data # 内置数据集
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
plt.rcParams[font.sans-serif][SimHei] #用来正常显示中文标签
plt.rcParams[axes.unicode_minus] False #用来正常显示负号
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split# 导入和切分数据
X,y iris_data()
X X[:,:2] # 选择前2个特征建模
# 切分数据集
X_train, X_test, y_train, y_test train_test_split(X, y, test_size0.33, random_state0)建立模型和训练
svm SVC(C0.5, kernellinear)
svm.fit(X_train, y_train)自定义绘图风格
scatter_kwargs {s: 120, edgecolor: None, alpha: 0.7}
contourf_kwargs {alpha: 0.2}
scatter_highlight_kwargs {s: 120, label: Test data, alpha: 0.7}# 绘制决策边界
plot_decision_regions(X, y, clfsvm, legend2,X_highlightX_test, # 高亮数据scatter_kwargsscatter_kwargs,contourf_kwargscontourf_kwargs,scatter_highlight_kwargsscatter_highlight_kwargs)# 添加坐标轴标注
plt.xlabel(sepal length)
plt.ylabel(petal length)
plt.title(SVM on Iris)
plt.show()自定义图例legend
from mlxtend.plotting import plot_decision_regions
from mlxtend.data import iris_data # 内置数据集
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
plt.rcParams[font.sans-serif][SimHei] #用来正常显示中文标签
plt.rcParams[axes.unicode_minus] False #用来正常显示负号
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split# 导入和切分数据
X,y iris_data()
X X[:,:2] # 选择前2个特征建模
# 切分数据集
X_train, X_test, y_train, y_test train_test_split(X, y, test_size0.33, random_state0)
svm SVC(C0.5, kernellinear)
svm.fit(X_train, y_train)修改图例
ax plot_decision_regions(X,y,clfsvm, legend0)plt.xlabel(sepal length)
plt.ylabel(petal length)
plt.title(SVM on Iris)# 自定义图例
handles, labels ax.get_legend_handles_labels()
ax.legend(handles, [class square,class triangle,class circle], framealpha0.3, scatterpoints1)plt.show()基于缩放因子的决策边界可视化zoom factors
from mlxtend.plotting import plot_decision_regions
from mlxtend.data import iris_data # 内置数据集
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
plt.rcParams[font.sans-serif][SimHei] #用来正常显示中文标签
plt.rcParams[axes.unicode_minus] False #用来正常显示负号
from sklearn.svm import SVC
from sklearn.model_selection import train_test_splitX,y iris_data()
X X[:,:2]
X_train, X_test, y_train, y_test train_test_split(X, y, test_size0.33, random_state0)
svm SVC(C0.5, kernellinear)
svm.fit(X_train, y_train)1、默认的缩放因子zoom_factor1.0
plot_decision_regions(X, y, clfsvm, zoom_factor1.)
plt.show()2、使用不同的缩放因子
plot_decision_regions(X, y, clfsvm, zoom_factor0.1)
plt.show()plot_decision_regions(X, y, clfsvm, zoom_factor2)
plt.xlim(5, 6)
plt.ylim(2, 5)
plt.show()使用Onehot编码输出的分类器onehot-encoded outputs (Keras)
定义了一个名为Onehot2Int的类该类用于将模型预测的one-hot编码结果转换为整数
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(123)import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categoricalfrom mlxtend.data import iris_data
from mlxtend.preprocessing import standardize
from mlxtend.plotting import plot_decision_regionsclass Onehot2Int(object):# 参数为model表示需要转换预测结果的模型def __init__(self, model): self.model model# X表示输入def predict(self, X):y_pred self.model.predict(X) # 预测return np.argmax(y_pred, axis1) # 找到每行中最大值的索引即one-hot编码中1的位置返回这些索引组成的数组数据预处理
X, y iris_data()
X X[:, [2, 3]]X standardize(X) # 标准化
y_onehot to_categorical(y) # 独热编码建立网络模型
model Sequential()
model.add(Dense(8, input_shape(2,), activationrelu, kernel_initializerhe_uniform))model.add(Dense(4, activationrelu, kernel_initializerhe_uniform))model.add(Dense(3, activationsoftmax))模型编译和训练
model.compile(losscategorical_crossentropy, optimizerkeras.optimizers.Adam(lr0.005), metrics[accuracy])history model.fit(X, y_onehot, epochs10,batch_size5, verbose1, validation_split0.1)Epoch 1/1027/27 [] - 0s 7ms/step - loss: 0.9506 - accuracy: 0.6074 - val_loss: 1.0899 - val_accuracy: 0.0000e00Epoch 2/1027/27 [] - 0s 2ms/step - loss: 0.7453 - accuracy: 0.6963 - val_loss: 1.0886 - val_accuracy: 0.0000e00Epoch 3/1027/27 [] - 0s 1ms/step - loss: 0.6098 - accuracy: 0.7185 - val_loss: 1.0572 - val_accuracy: 0.0000e00Epoch 4/1027/27 [] - 0s 2ms/step - loss: 0.5159 - accuracy: 0.7333 - val_loss: 1.0118 - val_accuracy: 0.0000e00Epoch 5/1027/27 [] - 0s 1ms/step - loss: 0.4379 - accuracy: 0.7630 - val_loss: 0.9585 - val_accuracy: 0.8000Epoch 6/1027/27 [] - 0s 2ms/step - loss: 0.3784 - accuracy: 0.8815 - val_loss: 0.8806 - val_accuracy: 0.9333Epoch 7/1027/27 [] - 0s 1ms/step - loss: 0.3378 - accuracy: 0.9407 - val_loss: 0.8155 - val_accuracy: 1.0000Epoch 8/1027/27 [] - 0s 2ms/step - loss: 0.3130 - accuracy: 0.9481 - val_loss: 0.7535 - val_accuracy: 1.0000Epoch 9/1027/27 [] - 0s 2ms/step - loss: 0.2893 - accuracy: 0.9259 - val_loss: 0.6859 - val_accuracy: 1.0000Epoch 10/1027/27 [] - 0s 2ms/step - loss: 0.2695 - accuracy: 0.9481 - val_loss: 0.6258 - val_accuracy: 1.0000model_no_ohe Onehot2Int(model) # 将现有模型转成one-hot处理后的模型# 绘制决策边界
plot_decision_regions(X, y, clfmodel_no_ohe)
plt.show() 9600/9600 [] - 5s 555us/step