公司做网站该注意哪些,广州新塘网站建设推广公司,免费搜索引擎入口,电商培训班训练集 测试集如果拿所有原始数据来训练#xff0c;存在的问题#xff1a;模型很差无法调整#xff1b;真实环境难以拿到真实 label#xff1b;所以将数据区分为 训练数据 和 测试数据(train test split)#xff1b;将训练数据来训练模型#xff1b;然后用测试数据…训练集 测试集如果拿所有原始数据来训练存在的问题模型很差无法调整真实环境难以拿到真实 label所以将数据区分为 训练数据 和 测试数据(train test split)将训练数据来训练模型然后用测试数据测试模型使用这种方式也存在问题python 原生分离 iris 数据集import numpy as npfrom sklearn import datasetsimport matplotlib.pyplot as pltiris datasets.load_iris()X iris.datay iris.targetX(array([[5.1, 3.5, 1.4, 0.2],[4.9, 3. , 1.4, 0.2], ...[6.2, 3.4, 5.4, 2.3],[5.9, 3. , 5.1, 1.8]])yarray([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])X.shape, y.shape #((150, 4), (150,))# shuffleshuffle_indexes np.random.permutation(len(X)) # 0--len(X) 的随机排列shuffle_indexes# array([ 22, 4, 142, 24, 7, 146, ... 9, 95, 130, 29, 124])test_ratio 0.2test_size int(len(X) * test_ratio)test_size # 30test_indexes shuffle_indexes[:test_size]train_indexes shuffle_indexes[test_size:]test_indexesarray([ 22, 4, 142, 24, 7, 146, 70, 77, 144, 14, 40, 119, 46, 85, 74, 87, 86, 60, 91, 120, 78, 45, 65, 105, 113, 39, 83, 80, 134, 16])X_train X[train_indexes]y_train y[train_indexes]X_test X[test_indexes]y_test y[test_indexes]X_test.shape, X_train.shape # ((30, 4), (120, 4))封装 train_test_split 函数def train_test_split(X, y, test_ratio0.2, seedNone):assert x.shape[0] y.shape[0], the size of X must be equal to the size of yassert 0.0 test_ratio 1.0, test_ ration must be validif seed:np.random.seed(seed)shuffle_indexes np.random.permutation(len(X))test_size int(len(X) * test_ratio)test_indexes shuffle_indexes[:test_size]train_indexes shuffle_indexes[test_size:]X_train X[train_indexes]y_train y[train_indexes]X_test X[test_indexes]y_test y[test_indexes]return X_train, y_train, X_test, y_testsklearn 中的 train_test_splitfrom sklearn.model_selection import train_test_splitX_train, X_test, y_train, y_test train_test_split(X, y)train_test_split(*arrays, **options)X_train, X_test, y_train, y_test train_test_split( X, y, test_size0.33, random_state42)