Python异方差的检验与修正_随笔

Python异方差的检验与修正

最近导师正准备用Python讲计量经济学，我和我的同门们负责完成一部分的实验任务，整理了好几次，每一次都有一定的完善。

import numpy as np
import pandas as pd

#引入数据
df=pd.Dataframe(pd.read_excel("D:\jl-1数据.xlsx"))
print(df)

#绘制粮食产量和播种面积的散点图
import matplotlib.pyplot as plt
# y=np.array(df["production"])
# x=np.array(df["area"])
y=df["production"]
x=df["area"]
plt.figure(1)#因为要画多张图，将各张图分开
ax=plt.plot(x,y,"b.")
plt.ylabel("PRODUCTION")
plt.xlabel("AREA")#多行数据一起运行才能正确显示

#构建一元线性回归模型
import statsmodels.api as sm
X=sm.add_constant(x)#加常数项
model1=sm.OLS(y,X)#做OLS回归
fit=model1.fit()#把模型的结果赋值给fit
print(fit.summary())

前面这些部分我的同门之前已经讲解过了，我就直接略过了。后面是我整理的处理异方差的一些方法。

"""方法一 图示法"""
#在散点图上添加回归线
plt.plot(x,fit.fittedvalues,"r")#和上面的程序一起运行，才能显示在一张图里

res=fit.resid#从OLS回归模型中获取残差
#残差与解释变量的散点图
plt.figure(2)#与之前的图区分开，画在两张图里
ax1=plt.plot(y,res,"b.")
ax1=plt.axhline(y=0,color="r")#绘制一条y=0水平线（基准线），方便观察
#绘制残差平方序列与解释变量的散点图
res2=res**2#获得残差的平方
plt.figure(3)
ax2=plt.plot(x,res2,"b.")

"""方法二 GQ检验"""
gq=sm.stats.diagnostic.het_goldfeldquandt(y,model1.exog,1,12,7)#参数分别为因变量，自变量(ndarray,用model.exog即可)，自变量的列下标，即按照该自变量排序（从小到大）
#split=12,drop=7 The first sample is [0:split], the second sample is [split+drop:]
#model.exog(所有自变量和常数项的值)
print("gq=%.4f,P-value=%.4f"%(gq[0],gq[1]))

在最开始的时候，我并没有找到上面这个函数，所以自己也写了一段代码，不过有了更简便的方法，直接用最简便的方法就好了。（下面简单做个展示）

"""方法二 GQ检验"""
#按解释变量的大小做升序排列
df1=df.sort_values(by="area", ascending=True)#按指定的列(by=)排序,ascending=False为降序，True为升序
print(df1)
#去掉中间部分的值，分别选取前12组观测值和后12组观测值分别进行一元线性回归
#前12组一元线性回归
x1=np.array(df1.iloc[0:12,2])#提取前12组的数据
y1=np.array(df1.iloc[0:12,1])#第一列的前12组
X1=sm.add_constant(x1)
model = sm.OLS(y1,X1)
fit2=model.fit()
e1=fit2.resid
sum1=sum(e1*e1)#计算前12组的残差平方和，（）里面平方，外面加和
#后12组一元线性回归
x2=np.array(df1.iloc[19:31,2])#提取后12组的数据
y2=np.array(df1.iloc[19:31,1])#Python不包括最后一位，故为31
X2=sm.add_constant(x2)
model = sm.OLS(y2,X2)
fit3=model.fit()
e2=fit3.resid
sum2=sum(e2*e2)
#F检验
F=sum2/sum1
print("F=",F)

"""方法三 White检验"""
white=sm.stats.diagnostic.het_white(res,model1.exog)#参数分别为残差(内部用残差平方回归)，model.exog(内部用平方和交互作用项回归)
print("white=%.4f,P-value=%.4f"%(white[0],white[1]))#分别是拉格朗日乘数统计量，拉格朗日乘数的P值

"""方法三 White检验"""
#得到残差平方序列
res2=res**2
#构建辅助回归模型，并进行OLS估计
X = np.column_stack((x,x**2))#将两个矩阵按列合并
# print(x)
# print(x**2)
# print(X)
X = sm.add_constant(X)
white_model = sm.OLS(res2,X)#有缺点，变量过多无法一一对应，变量少的时候可以用，变量多的时候不建议
fit4=white_model.fit()
print(fit4.summary())
#求判定系数
white_R_2 = fit4.rsquared#在结果中提取判定系数
print(white_R_2)
#计算white统计量
white = fit4.nobs*white_R_2#在结果中提取N
print(white)

"""方法四 BP检验"""
bp=sm.stats.het_breuschpagan(res,model1.exog)#参数分别为残差(内部用残差平方回归)，model.exog
print("bp=%.4f,P-value=%.4f"%(bp[0],bp[1]))

格利瑟检验做了最常用的三种：

"""方法四 Glejser检验"""
#获得残差的绝对值
res3=abs(res)
#进行OLS估计
x=np.array(df["area"])#上面已经定义，有无均可，单独提出，更加明显
X = sm.add_constant(x)
model = sm.OLS(res3,X)
fit6=model.fit()
print(fit6.summary())

x7=x**2
X = sm.add_constant(x7)
model = sm.OLS(res3,X)
fit7=model.fit()
print(fit7.summary())

x8=np.sqrt(x)
X = sm.add_constant(x8)
model = sm.OLS(res3,X)
fit8=model.fit()
print(fit8.summary())

"""WLS"""
y=np.array(df["production"])#上面已经定义
x=np.array(df["area"])
wk=1/x#设置权重
X=sm.add_constant(x)
wls_model=sm.WLS(y,X,weights=wk)
fit9=wls_model.fit()
print(fit9.summary())

欢迎分享，转载请注明来源：内存溢出

原文地址: https://outofmemory.cn/zaji/5658843.html

Python异方差的检验与修正

发表评论

评论列表（0条）