#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2022/1/1 13:49 # @Author : @linlianqin # @Site : # @File : naivyBates.py # @Software: PyCharm # @description: from sklearn.naive_bayes import GaussianNB from sklearn.naive_bayes import BernoulliNB from sklearn.naive_bayes import MultinomialNB import csv from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.metrics import accuracy_score from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report import joblib from dataProcess import loaddatasets from paths import abs_path import numpy as np # 处理数据 def loadDataSets(xlspath): datas, labels = loaddatasets(xlspath) labels_ = labels.reshape(len(labels), 1) dataSet = np.hstack((datas, labels_)).astype(int) return dataSet, datas, labels # 训练 def train(traffic_feature, traffic_target): print('traffic_feature=', traffic_feature) print('traffic_target=', traffic_target) scaler = StandardScaler() # 标准化转换 scaler.fit(traffic_feature) # 训练标准化对象 traffic_feature = scaler.transform(traffic_feature) # 转换数据集 feature_train, feature_test, target_train, target_test = train_test_split(traffic_feature, traffic_target, test_size=0.1, random_state=0) model = BernoulliNB() model.fit(feature_train, target_train) return model,feature_test,target_test # 预测 def predict(model, feature_test): predict_results = model.predict(feature_test) return predict_results # [1,2,3] # 评估 def evalue(model, predict_labels, true_labels): acc = accuracy_score(predict_labels, true_labels) print("准确率:", acc) conf_mat = confusion_matrix(true_labels, predict_labels) print("混淆矩阵:", conf_mat) report = classification_report(true_labels, predict_labels) print("模型分析报告:", report) return acc, conf_mat, report # 保存模型 def save_model(model, path): # 保存模型 joblib.dump(model, path) # 加载模型 def load_model(path): # 加载模型进行预测 new_model = joblib.load(path) return new_model if __name__ == '__main__': print("加载数据集......") xlsPath = abs_path + "\data\min_datas.xlsx" dataSet, datas, labels = loadDataSets(xlsPath) print("开始训练......") model,feature_test,target_test = train(datas, labels) print("测试模型,测试集") predict_labels = predict(model,feature_test) print("评估模型......") acc, conf_mat, report = evalue(model,predict_labels,target_test) print("保存模型") path = abs_path+"\data\naivyBates_%.2f.pkl"%acc save_model(model,path) print("调用模型进行预测") testVec = [[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]] # 注意数据是二维的 new_model = load_model(path) predict_results = new_model.predict(testVec) print("待测数据:",testVec) print("预测结果:",predict_results)
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)