StratifiedShuffleSplitStratifiedKFold训练集、测试集划分 通过指定 test_size 或train_size 通过指定 cv是否支持shuffle是是区别保证每组训练集(或测试集)的数据分布一致,但不一定与原始数据分布一致保证训练集(或测试集)与原始数据的分布一致。 from sklearn.model_selection import StratifiedShuffleSplit,StratifiedKFold import numpy as np X = np.array([ [1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4], [1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4], [1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4], [1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4], [1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4]]) y = np.array([ 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1 , 0, 0, 0, 0, 1, 1, 0, 0, 1 ,1]) ss=StratifiedShuffleSplit(n_splits=5,tra,random_state=0)#分成5组,测试比例为0.25, for train_index, test_index in ss.split(X, y): y_train, y_test = y[train_index], y[test_index]#类别集对应的值 print(y_train,y_test) print('/n') [1 1 1 1 0 0 0 1 1 0 1 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 0 1 0 1 0 1] [1 0 0 0 0 0 0 1 1 0 1 1 0 1 0] [0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 1 1 0 1 0 0 1 0 1 0 0 1 1 1 0] [0 1 1 1 1 1 0 1 0 0 0 0 0 0 0] [1 1 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 1 0 0 0 0 0 0 1 1] [0 0 1 0 1 0 0 0 0 1 0 1 1 1 0] [1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0] [1 0 0 1 0 0 0 0 0 1 1 0 1 0 1] [1 0 1 1 0 1 0 1 0 0 0 1 1 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0] [0 1 1 0 0 1 0 0 0 0 1 0 1 0 1]
sk=StratifiedKFold(n_splits=5, shuffle=True) for train_idx, valid_idx in sk.split(X,y): y_train, y_test = y[train_idx], y[valid_idx]#类别集对应的值 print(y_train,y_test) print('/n') [0 0 0 0 1 1 0 1 0 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 1 1 0 1 1] [0 1 0 0 0 1 0 1 1 0] [0 0 0 1 1 0 0 1 1 0 0 0 0 1 1 0 0 1 0 0 1 1 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 0 0 1] [0 1 0 0 1 1 0 0 0 1] [0 0 0 0 1 1 0 1 1 0 0 0 0 0 1 1 0 0 1 1 0 0 1 1 0 0 0 1 0 0 1 0 0 0 1 1 0 0 1 1] [0 0 1 1 0 0 0 1 1 0] [0 0 1 0 0 1 0 0 0 0 1 1 0 1 1 0 0 0 0 1 0 1 1 0 0 0 0 1 1 0 0 1 1 0 0 0 1 0 1 1] [0 0 1 1 0 1 0 0 1 0] [0 0 0 1 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 0 0 1 0 1 1 0 0 1 1 0 0 1 1 0 0 0 1 0 0 1] [0 1 0 1 0 0 0 0 1 1]