三种方式生成数据
方式1
#调用模块 from sklearn.datasets import load_iris data = load_iris() #导入数据和标签 data_X = data.data data_y = data.target
方式2
from sklearn import datasets loaded_data = datasets.load_iris() # 导入数据集的属性 #导入样本数据 data_X = loaded_data.data # 导入标签 data_y = loaded_data.target
方式3
# 直接返回 data_X, data_y = load_iris(return_X_y=True)
数据集使用汇总
from sklearn import datasets # 导入库 boston = datasets.load_boston() # 导入波士顿房价数据 print(boston.keys()) # 查看键(属性) ['data','target','feature_names','DESCR', 'filename'] print(boston.data.shape,boston.target.shape) # 查看数据的形状 print(boston.feature_names) # 查看有哪些特征 print(boston.DESCR) # described 数据集描述信息 print(boston.filename) # 文件路径
数据切分
# 导入模块 from sklearn.model_selection import train_test_split # 划分为训练集和测试集数据 X_train, X_test, y_train, y_test = train_test_split( data_X, data_y, test_size=0.2, random_state=111 ) # 150*0.8=120 len(X_train)
数据标准化和归一化
from sklearn.preprocessing import StandardScaler # 标准化 from sklearn.preprocessing import MinMaxScaler # 归一化 # 标准化 ss = StandardScaler() X_scaled = ss.fit_transform(X_train) # 传入待标准化的数据 # 归一化 mm = MinMaxScaler() X_scaled = mm.fit_transform(X_train)
类型编码
对数字编码
对字符串编码
原创文章,作者:朋远方,如若转载,请注明出处:https://caovan.com/06-scikit-learnjiaocheng/.html