原理参考:
Python手写实现朴素贝叶斯(从原理到代码)
朴素贝叶斯算法的Python实现(多项式、高斯)
三种常用的朴素贝叶斯实现算法——高斯朴素贝叶斯、伯努利朴素贝叶斯、多项式朴素贝叶斯
- 伯努利模型
- 多项式模型
- 高斯模型
python实现:
import numpy as np
class BernoulliNB(object):
def __init__(self, alpha = 1.0, fit_prior=True):
self.alpha = alpha
self.fit_prior = fit_prior #是否学习类的先验几率,False则使用统一的先验
self.class_prior = None #类的先验几率,若指定则先验不能根据数据调整
self.classes = None
self.conditional_prob = None
self.predict_prob = None
def fit(self,x,y):
#计算类别y的先验几率
self.classes = np.unique(y)
if self.class_prior == None:#先验几率没有指定
class_num = len(self.classes)
self.class_prior = {}
if not self.fit_prior:
for d in self.classes:
self.class_prior[d] = 1.0/class_num
else:
for d in self.classes:
c_num = np.sum(np.equal(y,d))
self.class_prior[d]=(c_num+self.alpha) / (len(y) + class_num * self.alpha)
print(self.class_prior)
#计算条件几率------伯努利模型
self.conditional_prob = {}#{x1|y1:p1,x2|y1:p2,.....,x1|y2:p3,x2|y2:p4,.....}
x_class = [0,1]
y = list(y)
for yy in self.class_prior.keys():
y_index = [i for i,label in enumerate(y) if label == yy] #标签的先验几率
for i in range(len(x)):
for c in x_class:
x_index = [j for j,feature in enumerate(x[i]) if feature == c]
xy_count = len(set(x_index) & set(y_index))
pkey = str(c) + '|' + str(yy)
self.conditional_prob[pkey] = (xy_count+self.alpha) / (len(y_index)+2)
print(self.conditional_prob)
return self
def predict(self, x):
labels = []
for i in range(x.shape[0]):
self.predict_prob = {}
for j in self.classes:
self.predict_prob[j] = self.class_prior[j]
for d in x[i]:
self.predict_prob[j] = self.predict_prob[j]*self.conditional_prob[str(d) + '|'+ str(j)]
print(self.predict_prob)
label = max(self.predict_prob, key=self.predict_prob.get)
labels.append(label)
return labels
if __name__ == '__main__':
x = np.array([[1,1,1,1,1,0,0,0,0,0,1,1,1,1,1],[0,1,1,0,0,0,1,1,1,1,1,1,1,1,1]])
y = np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])
bnb= BernoulliNB(alpha=1.0,fit_prior=True)
bnb.fit(x, y)
print('预测值为:', bnb.predict(np.array([[0,0]])))
python调包:
import numpy as np
from sklearn.naive_bayes import BernoulliNB
if __name__ == '__main__':
x = np.array([[1,1,1,1,1,0,0,0,0,0,1,1,1,1,1],[0,1,1,0,0,0,1,1,1,1,1,1,1,1,1]]).T
y = np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])
bnb = BernoulliNB()
bnb.fit(x,y)
print('预测值为:', bnb.predict(np.array([[0,0]])))
C++实现:
#include
#include
#include
#include
#include
#include
#include
class BernoulliNB
{
public:
BernoulliNB(float alpha, bool fit_prior = true):m_alpha(alpha),m_fit_prior(fit_prior){}
void fit(std::vector<std::vector<float>> x, std::vector<float> y)
{
for (auto i : y)
m_classes.insert(i);
if (m_class_prior.size() == 0)
{
int class_num = m_classes.size();
if (!m_fit_prior)
{
for (auto d : m_classes)
m_class_prior[d] = 1.0 / class_num;
}
else {
for (auto d : m_classes)
{
int c_num = 0;
for (auto i:y) c_num += (d == i ? 1 : 0);
m_class_prior[d] = (c_num + m_alpha) / (y.size() + class_num*m_alpha);
}
}
}
std::vector<float> x_class = { 0, 1 };
for (auto pa : m_class_prior)
{
// std::cout << pa.first << " " << pa.second << std::endl;
std::vector<int> y_index;
for (size_t i = 0; i < y.size(); i++)
{
if (y[i] == pa.first) y_index.push_back(i);
}
for (size_t i = 0; i < x.size(); i++)
{
for (auto c : x_class)
{
std::vector<int> x_index;
for (size_t j = 0; j < x[i].size(); j++)
{
if (x[i][j] == c) x_index.push_back(j);
}
std::sort(x_index.begin(), x_index.end());
std::sort(y_index.begin(), y_index.end());
std::vector<int> xy_index;
std::set_intersection(x_index.begin(), x_index.end(), y_index.begin(), y_index.end(), std::back_inserter(xy_index));//求交集
std::string pkey = std::to_string(c) + "|" + std::to_string(pa.first);
//std::cout << pkey << std::endl;
m_conditional_prob[pkey] = (xy_index.size() + m_alpha) / (y_index.size() + 2);
}
}
}
for (auto pa : m_conditional_prob)
std::cout << pa.first << " " << pa.second << std::endl;
}
std::vector<float> predict(std::vector<std::vector<float>> x)
{
std::vector<float> labels;
for (size_t i = 0; i < x.size(); i++)
{
m_predict_prob.clear();
for (auto j : m_classes)
{
m_predict_prob[j] = m_class_prior[j];
for (auto d : x[i])
{
m_predict_prob[j] *= m_conditional_prob[std::to_string(d) + "|" + std::to_string(j)];
}
}
for (auto pa : m_predict_prob)
std::cout << pa.first << " " << pa.second << std::endl;
std::vector<std::pair<float, float>> m_predict_prob_vec;
for (std::map<float, float>::iterator it = m_predict_prob.begin(); it != m_predict_prob.end(); it++)
{
m_predict_prob_vec.push_back(std::make_pair((*it).first, (*it).second));
}
std::sort(m_predict_prob_vec.begin(), m_predict_prob_vec.end(), [](std::pair<float, float> p1, std::pair<float, float> p2) {return p1.second < p2.second; });
float label = m_predict_prob_vec.rbegin()->first;
labels.push_back(label);
}
return labels;
}
private:
float m_alpha;
bool m_fit_prior;
std::map<float, float> m_class_prior;
std::set<float> m_classes;
std::map<std::string, float> m_conditional_prob;
std::map<float, float> m_predict_prob;
};
int main(int argc, char* argv[])
{
std::vector<std::vector<float>> x = { { 1,1,1,1,1,0,0,0,0,0,1,1,1,1,1 }, { 0,1,1,0,0,0,1,1,1,1,1,1,1,1,1 } };
std::vector<float> y = { -1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1 };
BernoulliNB bnb = BernoulliNB(1.0, true);
bnb.fit(x, y);
std::cout << "预测值为:" << bnb.predict({ { 0,0 } })[0] << std::endl;
system("pause");
return EXIT_SUCCESS;
}
多项式模型
python实现:
import numpy as np
class MultinomialNB(object):
def __init__(self, alpha = 1.0, fit_prior=True):
self.alpha = alpha
self.fit_prior = fit_prior #是否学习类的先验几率,False则使用统一的先验
self.class_prior = None #类的先验几率,若指定则先验不能根据数据调整
self.classes = None
self.conditional_prob = None
self.predict_prob = None
def fit(self,x,y):
#计算类别y的先验几率
self.classes = np.unique(y)
if self.class_prior == None:#先验几率没有指定
class_num = len(self.classes)
self.class_prior = {}
if not self.fit_prior:
for d in self.classes:
self.class_prior[d] = 1.0/class_num
else:
for d in self.classes:
c_num = np.sum(np.equal(y,d))
self.class_prior[d]=(c_num+self.alpha) / (len(y) + class_num * self.alpha)
print(self.class_prior)
#计算条件几率------多项式模型
self.conditional_prob = {}#{x1|y1:p1,x2|y1:p2,.....,x1|y2:p3,x2|y2:p4,.....}
y = list(y)
for yy in self.class_prior.keys():
y_index = [i for i,label in enumerate(y) if label == yy] #标签的先验几率
for i in range(len(x)):
x_class = np.unique(x[i])
#print(x_class)
for c in list(x_class):
x_index = [j for j,feature in enumerate(x[i]) if feature == c]
xy_count = len(set(x_index) & set(y_index))
pkey = str(c) + '|' + str(yy)
self.conditional_prob[pkey] = (xy_count+self.alpha) / (len(y_index)+x_class.shape[0])
print(self.conditional_prob)
return self
def predict(self, x):
labels = []
for i in range(x.shape[0]):
self.predict_prob = {}
for j in self.classes:
self.predict_prob[j] = self.class_prior[j]
for d in x[i]:
self.predict_prob[j] = self.predict_prob[j]*self.conditional_prob[str(d) + '|'+ str(j)]
print(self.predict_prob)
label = max(self.predict_prob, key=self.predict_prob.get)
labels.append(label)
return labels
if __name__ == '__main__':
x = np.array([[1,1,1,1,1,2,2,2,2,2,3,3,3,3,3],[1,2,2,1,1,1,2,2,3,3,3,2,2,3,3]])
y = np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])
mnb = MultinomialNB(alpha=1.0,fit_prior=True)
mnb.fit(x, y)
print('预测值为:', mnb.predict(np.array([[2,1]])))
python调包:
import numpy as np
from sklearn.naive_bayes import MultinomialNB
if __name__ == '__main__':
x = np.array([[1,1,1,1,1,2,2,2,2,2,3,3,3,3,3],[1,2,2,1,1,1,2,2,3,3,3,2,2,3,3]]).T
y = np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])
mnb = MultinomialNB()
mnb.fit(x,y)
print('预测值为:', mnb.predict(np.array([[2,1]])))
C++实现:
#include
#include
#include
#include
#include
#include
#include
class MultinomialNB
{
public:
MultinomialNB(float alpha, bool fit_prior = true) :m_alpha(alpha), m_fit_prior(fit_prior) {}
void fit(std::vector<std::vector<float>> x, std::vector<float> y)
{
for (auto i : y)
m_classes.insert(i);
if (m_class_prior.size() == 0)
{
int class_num = m_classes.size();
if (!m_fit_prior)
{
for (auto d : m_classes)
m_class_prior[d] = 1.0 / class_num;
}
else {
for (auto d : m_classes)
{
int c_num = 0;
for (auto i : y) c_num += (d == i ? 1 : 0);
m_class_prior[d] = (c_num + m_alpha) / (y.size() + class_num*m_alpha);
}
}
}
for (auto pa : m_class_prior)
{
// std::cout << pa.first << " " << pa.second << std::endl;
std::vector<int> y_index;
for (size_t i = 0; i < y.size(); i++)
{
if (y[i] == pa.first) y_index.push_back(i);
}
for (size_t i = 0; i < x.size(); i++)
{
std::set<float> x_class(x[i].begin(), x[i].end());
for (auto c : x_class)
{
std::vector<int> x_index;
for (size_t j = 0; j < x[i].size(); j++)
{
if (x[i][j] == c) x_index.push_back(j);
}
std::sort(x_index.begin(), x_index.end());
std::sort(y_index.begin(), y_index.end());
std::vector<int> xy_index;
std::set_intersection(x_index.begin(), x_index.end(), y_index.begin(), y_index.end(), std::back_inserter(xy_index));//求交集
std::string pkey = std::to_string(c) + "|" + std::to_string(pa.first);
m_conditional_prob[pkey] = (xy_index.size() + m_alpha) / (y_index.size() + x_class.size());
}
}
}
for (auto pa : m_conditional_prob)
std::cout << pa.first << " " << pa.second << std::endl;
}
std::vector<float> predict(std::vector<std::vector<float>> x)
{
std::vector<float> labels;
for (size_t i = 0; i < x.size(); i++)
{
m_predict_prob.clear();
for (auto j : m_classes)
{
m_predict_prob[j] = m_class_prior[j];
for (auto d : x[i])
{
m_predict_prob[j] *= m_conditional_prob[std::to_string(d) + "|" + std::to_string(j)];
}
}
for (auto pa : m_predict_prob)
std::cout << pa.first << " " << pa.second << std::endl;
std::vector<std::pair<float, float>> m_predict_prob_vec;
for (std::map<float, float>::iterator it = m_predict_prob.begin(); it != m_predict_prob.end(); it++)
{
m_predict_prob_vec.push_back(std::make_pair((*it).first, (*it).second));
}
std::sort(m_predict_prob_vec.begin(), m_predict_prob_vec.end(), [](std::pair<float, float> p1, std::pair<float, float> p2) {return p1.second < p2.second; });
float label = m_predict_prob_vec.rbegin()->first;
labels.push_back(label);
}
return labels;
}
private:
float m_alpha;
bool m_fit_prior;
std::map<float, float> m_class_prior;
std::set<float> m_classes;
std::map<std::string, float> m_conditional_prob;
std::map<float, float> m_predict_prob;
};
int main(int argc, char* argv[])
{
std::vector<std::vector<float>> x = { { 1,1,1,1,1,2,2,2,2,2,3,3,3,3,3 },{ 1,2,2,1,1,1,2,2,3,3,3,2,2,3,3 } };
std::vector<float> y = { -1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1 };
MultinomialNB mnb = MultinomialNB(1.0, true);
mnb.fit(x, y);
std::cout << "预测值为:" << mnb.predict({ { 2,1 } })[0] << std::endl;
system("pause");
return EXIT_SUCCESS;
}
高斯模型
python实现:
import numpy as np
class GaussionNB(object): #计算条件几率的方法不同
def __init__(self, fit_prior=True):
self.fit_prior = fit_prior #是否学习类的先验几率,False则使用统一的先验
self.class_prior = None #类的先验几率,若指定则先验不能根据数据调整
self.classes = None
self.mean = None
self.var = None
self.predict_prob = None
def fit(self,x,y):
#计算类别y的先验几率
self.classes = np.unique(y)
if self.class_prior == None:#先验几率没有指定
class_num = len(self.classes)
self.class_prior = {}
if not self.fit_prior:
for d in self.classes:
self.class_prior[d] = 1.0/class_num
else:
self.class_prior = {}
for d in self.classes:
c_num = np.sum(np.equal(y,d))
self.class_prior[d]= c_num / len(y)
print(self.class_prior)
#计算条件几率------高斯模型
self.mean = {}
self.var = {}
y = list(y)
for yy in self.class_prior.keys():
y_index = [i for i,label in enumerate(y) if label == yy] #标签的先验几率
#print(y_index)
for i in range(len(x)):
x_class =[]
for j in y_index:
x_class.append(x[i][j])
pkey = str(i) + '|' + str(yy)
mean = np.mean(x_class)
var = np.var(x_class)
self.mean[pkey] = mean
self.var[pkey] = var
#print(x_class)
print(self.mean, self.var)
return self
def _calculat_prob_gaussion(self,mu,sigma,x):
return 1.0/(sigma * np.sqrt(2 * np.pi)) * np.exp( - (x - mu)**2 / (2 * sigma**2))
def predict(self,x):
labels = []
for i in range(x.shape[0]):
self.predict_prob = {}
for yy in self.class_prior.keys():
self.predict_prob[yy] = self.class_prior[yy]
for c,d in enumerate(list(x[i])):
tkey = str(c)+'|'+str(yy)
mu = self.mean[tkey]
sigma = self.var[tkey]
print(mu,sigma)
self.predict_prob[yy] = self.predict_prob[yy]*self._calculat_prob_gaussion(mu,sigma,d)
print(self.predict_prob)
label = max(self.predict_prob, key=self.predict_prob.get)
labels.append(label)
return labels
if __name__ == '__main__':
x = np.array([[1,1,1,1,1,2,2,2,2,2,3,3,3,3,3],[1,2,2,1,1,1,2,2,3,3,3,2,2,3,3]])
y = np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])
gnb = GaussionNB()
gnb.fit(x, y)
print('预测值为:', gnb.predict(np.array([[2,1]])))
python调包:
import numpy as np
from sklearn.naive_bayes import GaussianNB
if __name__ == '__main__':
x = np.array([[1,1,1,1,1,2,2,2,2,2,3,3,3,3,3],[1,2,2,1,1,1,2,2,3,3,3,2,2,3,3]]).T
y = np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])
gnb = GaussianNB()
gnb.fit(x,y)
print('预测值为:', gnb.predict(np.array([[2,1]])))
C++实现:
#include
#include
#include
#include
#include
#include
#include
#define PI 3.1415926
class GaussionNB
{
public:
GaussionNB(bool fit_prior = true) : m_fit_prior(fit_prior) {}
void fit(std::vector<std::vector<float>> x, std::vector<float> y)
{
for (auto i : y)
m_classes.insert(i);
if (m_class_prior.size() == 0)
{
int class_num = m_classes.size();
if (!m_fit_prior)
{
for (auto d : m_classes)
m_class_prior[d] = 1.0 / class_num;
}
else {
for (auto d : m_classes)
{
float c_num = 0;
for (auto i : y) c_num += (d == i ? 1 : 0);
m_class_prior[d] = c_num / y.size();
}
}
}
for (auto pa : m_class_prior)
{
//std::cout << pa.first << " " << pa.second << std::endl;
std::vector<int> y_index;
for (size_t i = 0; i < y.size(); i++)
{
if (y[i] == pa.first) y_index.push_back(i);
}
//for (size_t j = 0; j < y_index.size(); j++) std::cout << y_index[j] << " "; std::cout << std::endl;
for (size_t i = 0; i < x.size(); i++)
{
std::vector<float> x_class;
for (auto j:y_index)
{
x_class.push_back(x[i][j]);
std::string pkey = std::to_string(i) + "|" + std::to_string(pa.first);
float mean_val = 0;
for (size_t k = 0; k < x_class.size(); k++)
{
mean_val += x_class[k];
}
mean_val /= x_class.size();
m_mean[pkey] = mean_val;
float var_val = 0;
for (size_t k = 0; k < x_class.size(); k++)
{
var_val += pow(x_class[k] - mean_val, 2);
}
var_val /= x_class.size();
m_var[pkey] = var_val;
}
//for (auto i : x_class) std::cout << i << " "; std::cout << std::endl;
}
}
//for (auto pa : m_mean) std::cout << pa.first << " " << pa.second << std::endl;
//for (auto pa : m_var) std::cout << pa.first << " " << pa.second << std::endl;
}
float _calculat_prob_gaussion(float mu, float sigma, float x)
{
return 1.0 / (sigma*sqrt(2 * PI))*exp(-pow(x - mu, 2) / (2 * pow(sigma, 2)));
}
std::vector<float> predict(std::vector<std::vector<float>> x)
{
std::vector<float> labels;
for (size_t i = 0; i < x.size(); i++)
{
m_predict_prob.clear();
for (auto pa : m_class_prior)
{
m_predict_prob[pa.first] = m_class_prior[pa.first];
for (size_t j = 0; j < x[i].size(); ++j)
{
std::string tkey = std::to_string(j) + "|" + std::to_string(pa.first);
//std::cout << tkey << std::endl;
float mu = m_mean[tkey];
float sigma = m_var[tkey];
//std::cout << mu << " " << sigma << std::endl;
m_predict_prob[pa.first] *= _calculat_prob_gaussion(mu, sigma, x[i][j]);
}
}
//for (auto pa : m_predict_prob) std::cout << pa.first << " " << pa.second << std::endl;
std::vector<std::pair<float, float>> m_predict_prob_vec;
for (std::map<float, float>::iterator it = m_predict_prob.begin(); it != m_predict_prob.end(); it++)
{
m_predict_prob_vec.push_back(std::make_pair((*it).first, (*it).second));
}
std::sort(m_predict_prob_vec.begin(), m_predict_prob_vec.end(), [](std::pair<float, float> p1, std::pair<float, float> p2) {return p1.second < p2.second; });
float label = m_predict_prob_vec.rbegin()->first;
labels.push_back(label);
}
return labels;
}
private:
bool m_fit_prior;
std::map<float, float> m_class_prior;
std::set<float> m_classes;
std::map<std::string, float> m_mean;
std::map<std::string, float> m_var;
std::map<float, float> m_predict_prob;
};
int main(int argc, char* argv[])
{
std::vector<std::vector<float>> x = { { 1,1,1,1,1,2,2,2,2,2,3,3,3,3,3 },{ 1,2,2,1,1,1,2,2,3,3,3,2,2,3,3 } };
std::vector<float> y = { -1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1 };
GaussionNB gnb = GaussionNB();
gnb.fit(x, y);
std::cout << "预测值为:" << gnb.predict({ { 2,1 } })[0] << std::endl;
system("pause");
return EXIT_SUCCESS;
}
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)