题目
 
采用SVM方法实现鸢尾花(Iris)数据集分类
 
数据集
 
iris.name是关于数据集的属性说明;
 
iris.data是实际的数据集,它包含三类数据,每类数据有50条数据。
 

 
要求
 
训练集:选取Iris数据集中80%的数据,即120个数据,每类含有40个数据。
 
测试集:采用除训练集外的30个数据。
 
具体SVM方法:自由根据情况来选择。
 
评价指标:选取分类相关的评价指标来衡量分类结果。
 
# -*- coding: utf-8 -*- #
"""
@Project    :NIR-Mathematical-Modeling-Tool 
@File       :main.py 
@Author     :ZAY
@Time       :2023/6/4 15:44
@Annotation : " "
"""
import os
import torch
import sklearn
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score,auc,roc_curve,precision_recall_curve,f1_score, precision_score, recall_score
from Exp.Exp3.Plot import plotShow
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 将标签由文字映射为数字
def Iris_label(s):
    it = {b'Iris-setosa': 0, b'Iris-versicolor': 1, b'Iris-virginica': 2}
    return it[s]
if __name__ == "__main__":
    txt_path = './/Result//SVM.txt'
    data = np.loadtxt("./Data/iris.data", dtype = float, delimiter = ',', converters = {4: Iris_label})
    data_x, label_y = np.split(data, indices_or_sections = (4,), axis = 1)  # x为数据,y为标签
    data_x = data_x[:, 0:2]
    train_data, test_data, train_label, test_label = sklearn.model_selection.train_test_split(data_x, label_y,
                                                                                              random_state = 1,
                                                                                              train_size = 0.8,
                                                                                              test_size = 0.2)
    # 训练 SVM 分类器
    classifier = svm.SVC(C = 2, kernel = 'rbf', gamma = 10, decision_function_shape = 'ovr')
    classifier.fit(train_data, train_label.ravel())
    train_label_pre = classifier.predict(train_data)
    test_label_pre = classifier.predict(test_data)
    print('训练集:', accuracy_score(train_label, train_label_pre))
    print('测试集:', accuracy_score(test_label, test_label_pre))
    # 查看内部决策函数(返回的是样本到超平面的距离)
    train_decision_function = classifier.decision_function(train_data)
    predict_result = classifier.predict(train_data)
    print('train_decision_function:', classifier.decision_function(train_data))
    print('predict_result:', classifier.predict(train_data))
    plotShow(test_data, test_label, data_x, label_y, classifier)
 
 plotShow.py
 
# -*- coding: utf-8 -*- #
"""
@Project    :NIR-Mathematical-Modeling-Tool 
@File       :plot.py 
@Author     :ZAY
@Time       :2023/6/5 21:41
@Annotation : " "
"""
# 确定坐标轴范围
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
def plotShow(test_data, test_label, data_x, label_y, classifier):
    x1_min, x1_max = data_x[:, 0].min(), data_x[:, 0].max()  # 第0维特征的范围
    x2_min, x2_max = data_x[:, 1].min(), data_x[:, 1].max()  # 第1维特征的范围
    x1, x2 = np.mgrid[x1_min:x1_max:200j, x2_min:x2_max:200j]  # 生成网络采样点
    grid_test = np.stack((x1.flat, x2.flat), axis = 1)  # 测试点
    # 指定默认字体
    matplotlib.rcParams['font.sans-serif'] = ['SimHei']
    # 设置颜色
    cm_light = matplotlib.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF'])
    cm_dark = matplotlib.colors.ListedColormap(['g', 'r', 'b'])
    grid_hat = classifier.predict(grid_test)  # 预测分类值
    grid_hat = grid_hat.reshape(x1.shape)  # 使之与输入的形状相同
    plt.pcolormesh(x1, x2, grid_hat, cmap = cm_light)  # 预测值的显示
    plt.scatter(data_x[:, 0], data_x[:, 1], c = label_y[:, 0], s = 30, cmap = cm_dark)  # 样本
    plt.scatter(test_data[:, 0], test_data[:, 1], c = test_label[:, 0], s = 30, edgecolors = 'k', zorder = 2,
                cmap = cm_dark)  # 圈中测试集样本点
    plt.xlabel('花萼长度', fontsize = 13)
    plt.ylabel('花萼宽度', fontsize = 13)
    plt.xlim(x1_min, x1_max)
    plt.ylim(x2_min, x2_max)
    plt.title('鸢尾花SVM二特征分类')
    plt.savefig('./Result/iris-cla.png')
    plt.show()