sklearn代码18 python自动化处理数据

sklearn代码18 python自动化处理数据,第1张

sklearn代码18 python自动化处理数据
import numpy as np

import pandas as pd

from pandas import Series,Dataframe

import matplotlib.pyplot as plt
%matplotlib inline
data = pd.read_excel('./18级高一体测成绩汇总.xls')
data
班级性别姓名1000米50米跳远体前屈引体肺活量身高体重01男高孜阳06114'138.88195121278517072.611男郝少杰10134'167.70225117313317452.721男郝梓烨06194'098.45218141390116946.531男何弘源10104'218.05206131494618379.741男刘硕鹏12123'447.52210139353817154.751男刘运硕03143'497.94190207397017566.461男吕晓瑶03143'547.75186117371017353.971男米孜聪06364'38.0619531557817883.181男聂浩然27194'017.752201510382117566.591男牛苗嘉12114'127.382451711442316753.9101男牛砚哲281347.822191311403117357.4111男齐子涵185x4'137.37228915435416354.6121男乔一甲06163'457.6620273223817961.1131男任晓波03113'467.6624537481117763.9141男戎小龙26330000000NaN151男桑淳熙06163'577.6019275414717459.2161男田晓龙24114'188.1421084424117961.9171男田玉聪27163'327.202552212532418363.4181男王晨宇06133'568.152071312436317360.5191男王家梁06303'478.152021316536417456201男王乐天33313'537.8521037344517756.9211男王一钊12133'577.8522092567017755.5221男王子天06343'427.232121215570918572.3231男王子鑫00124'37.68218153478017783.7241男未晓锟12144'148.30206151335817346.6251男张国瑞033x4'048.1520595349416948.3261男张皓天06324'047.55190125328616950.1271男张泽地03104'027.55240512448317158.4281男张智贤03183'577.89220911425416654.8291男赵博翰101x4'168.19212277349816968....................................46317男王亚楠26364'158.36217202545217583.446417男陈核涛26124'367.22267611555517962.246517男曹佳尧213X3'487.372251712551917662.246617男贾存生78123'587.372361211424616960.146717男杨辰阳06314'028210187403416756.846817男张雨康181X4'028196124573817266.546917男刘帅怡03194'388.0922321851681697847017男张世荣0326NaNNaNNaNNaNNaNNaNNaNNaN47117男刘泽阳181X4'28.37208218567717263.747217男王鹏鑫00144'267.89232218705218082.947317男贾耀杰10344'098.4620515742081716147417男刘艺通06193'497.662321110589717556.147517男段佳硕27374'367.772361120515817655.247617男刘鼎03154'378.27208171631117795.647717男张浩27343'448.27217157507517057.647817男庞慧谦07073'557.982122010556416854.547917男李垚泽06153'417.5722595559918174.848017男胡德皓36145'299.02210120671218395.948117男张博03164'117.512382114559017967.748217男张育森30114'567.4225291351591807048317男吴宜凯00153'547.96229149525418264.148417男左一萌1015NaNNaNNaNNaNNaNNaNNaNNaN48517男王鹏飞06154'48.0218081459218764.648617男张泽琼18153'547.512381311557217659.548717男张晓波061X4'588.76200129453316951.348817男张乔楠03114'238.27208100464717669.548917男郭泽森03335'199.5521015670421777649017男陈子龙061X3'257.5252131357551816549117男王丹龙06364'397.812081411568817251.749217男王玉涵0636NaNNaNNaNNaNNaNNaNNaNNaN

493 rows × 11 columns

data[:45]
班级性别姓名1000米50米跳远体前屈引体肺活量身高体重01男高孜阳06114'138.88195121278517072.611男郝少杰10134'167.70225117313317452.721男郝梓烨06194'098.45218141390116946.531男何弘源10104'218.05206131494618379.741男刘硕鹏12123'447.52210139353817154.751男刘运硕03143'497.94190207397017566.461男吕晓瑶03143'547.75186117371017353.971男米孜聪06364'38.0619531557817883.181男聂浩然27194'017.752201510382117566.591男牛苗嘉12114'127.382451711442316753.9101男牛砚哲281347.822191311403117357.4111男齐子涵185x4'137.37228915435416354.6121男乔一甲06163'457.6620273223817961.1131男任晓波03113'467.6624537481117763.9141男戎小龙26330000000NaN151男桑淳熙06163'577.6019275414717459.2161男田晓龙24114'188.1421084424117961.9171男田玉聪27163'327.202552212532418363.4181男王晨宇06133'568.152071312436317360.5191男王家梁06303'478.152021316536417456201男王乐天33313'537.8521037344517756.9211男王一钊12133'577.8522092567017755.5221男王子天06343'427.232121215570918572.3231男王子鑫00124'37.68218153478017783.7241男未晓锟12144'148.30206151335817346.6251男张国瑞033x4'048.1520595349416948.3261男张皓天06324'047.55190125328616950.1271男张泽地03104'027.55240512448317158.4281男张智贤03183'577.89220911425416654.8291男赵博翰101x4'168.19212277349816968301男赵泽凯03114'017.89213511432217455.9311男赵泽宇06164'088.212081920391716651.9321男左晶川12174'068.71206114397017247.833班级性别姓名800米50米跳远体前屈仰卧起坐肺活量身高体重342男贾和06334'227.9721599386517558.7352男李森06360000000NaN362男李一帆18124'468.7917271475017488.6372男李子阳06184'017.3721027471418262.5382男吕星繁03120000000NaN392男赵凌云105x4'137.7720887432717356402男赵鹏悦26124'278.8185105474516474.841班级性别姓名1000米50米跳远体前屈引体肺活量身高体重423男宫诚博06123'436.89276161252121.8473.1433男郭亚浩181X4'047.2524013847561.7672443男郝晓辰00133'387.36246221144331.8462.5
cond = data['班级']!='班级'  # 删除多余的班级信息
data = data[cond]
data[:45]
班级性别姓名1000米50米跳远体前屈引体肺活量身高体重01男高孜阳06114'138.88195121278517072.611男郝少杰10134'167.70225117313317452.721男郝梓烨06194'098.45218141390116946.531男何弘源10104'218.05206131494618379.741男刘硕鹏12123'447.52210139353817154.751男刘运硕03143'497.94190207397017566.461男吕晓瑶03143'547.75186117371017353.971男米孜聪06364'38.0619531557817883.181男聂浩然27194'017.752201510382117566.591男牛苗嘉12114'127.382451711442316753.9101男牛砚哲281347.822191311403117357.4111男齐子涵185x4'137.37228915435416354.6121男乔一甲06163'457.6620273223817961.1131男任晓波03113'467.6624537481117763.9141男戎小龙26330000000NaN151男桑淳熙06163'577.6019275414717459.2161男田晓龙24114'188.1421084424117961.9171男田玉聪27163'327.202552212532418363.4181男王晨宇06133'568.152071312436317360.5191男王家梁06303'478.152021316536417456201男王乐天33313'537.8521037344517756.9211男王一钊12133'577.8522092567017755.5221男王子天06343'427.232121215570918572.3231男王子鑫00124'37.68218153478017783.7241男未晓锟12144'148.30206151335817346.6251男张国瑞033x4'048.1520595349416948.3261男张皓天06324'047.55190125328616950.1271男张泽地03104'027.55240512448317158.4281男张智贤03183'577.89220911425416654.8291男赵博翰101x4'168.19212277349816968301男赵泽凯03114'017.89213511432217455.9311男赵泽宇06164'088.212081920391716651.9321男左晶川12174'068.71206114397017247.8342男贾和06334'227.9721599386517558.7352男李森06360000000NaN362男李一帆18124'468.7917271475017488.6372男李子阳06184'017.3721027471418262.5382男吕星繁03120000000NaN392男赵凌云105x4'137.7720887432717356402男赵鹏悦26124'278.8185105474516474.8423男宫诚博06123'436.89276161252121.8473.1433男郭亚浩181X4'047.2524013847561.7672443男郝晓辰00133'387.36246221144331.8462.5453男李国玺23104'198.1722018144381.7472.2463男李一帆12184'087.822715160331.7785.6
data.fillna(0,inplace=True)  #没参加处理为0
C:UsersLXQAnaconda3libsite-packagespandascoreframe.py:2754: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a Dataframe

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)
# 没有空数据了
data.isnull().any()
班级       False
性别       False
姓名       False
1000米    False
50米      False
跳远       False
体前屈      False
引体       False
肺活量      False
身高       False
体重       False
dtype: bool
data.head()
班级性别姓名1000米50米跳远体前屈引体肺活量身高体重01男高孜阳06114'138.88195.01212785170.072.611男郝少杰10134'167.70225.01173133174.052.721男郝梓烨06194'098.45218.01413901169.046.531男何弘源10104'218.05206.01314946183.079.741男刘硕鹏12123'447.52210.01393538171.054.7
def convert(x):    #将数据转换为成绩
    if isinstance(x,str):
        minute,second = x.split("'")
        minute = int(minute)
        second = int(second)
        return minute + second/100.0
    else:
        return x
    
data['1000米'] = data['1000米'].map(convert)

data.head()
C:UsersLXQAnaconda3libsite-packagesipykernel_launcher.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a Dataframe.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
班级性别姓名1000米50米跳远体前屈引体肺活量身高体重01男高孜阳06114.138.88195.01212785170.072.611男郝少杰10134.167.70225.01173133174.052.721男郝梓烨06194.098.45218.01413901169.046.531男何弘源10104.218.05206.01314946183.079.741男刘硕鹏12123.447.52210.01393538171.054.7
score = pd.read_excel('体侧成绩评分表.xls',header = [0,1])
score
男肺活量男肺活量女肺活量男50米跑女50米跑男体前屈女体前屈...女跳远男引体女仰卧男1000女800成绩分数成绩分数成绩分数成绩分数成绩分数成绩...成绩分数成绩分数成绩分数成绩分数成绩分数454010031501007.11007.810023.610024.2...20410016.0100531003'30"1003'24"1004420953100957.2957.99521.59522.5...1989515.09551953'35"953'30"954300903050907.3908.09019.49020.8...1929014.09049903'40"903'36"904050852900857.4858.38517.28519.1...1858513.08546853'47"853'43"853800802750807.5808.68015.08017.4...1788012.08043803'55"803'50"803680782650787.7788.87813.67816.1...17578NaN7841784'00"783'55"783560762550767.9769.07612.27614.8...1727611.07639764'05"764'00"763440742450748.1749.27410.87413.5...16974NaN7437744'10"744'05"743320722350728.3729.4729.47212.2...1667210.07235724'15"724'10"723200702250708.5709.6708.07010.9...16370NaN7033704'20"704'15"703080682150688.7689.8686.6689.6...160689.06831684'25"684'20"682960662050668.96610.0665.2668.3...15766NaN6629664'30"664'25"662840641950649.16410.2643.8647.0...154648.06427644'35"644'30"642720621850629.36210.4622.4625.7...15162NaN6225624'40"624'35"622600601750609.56010.6601.0604.4...148607.06023604'45"604'40"602470501710509.75010.8500.0503.6...143506.05021505'05"504'50"502340401670409.94011.040-1.0402.8...138405.04019405'25"405'00"4022103016303010.13011.230-2.0302.0...133304.03017305'45"305'10"3020802015902010.32011.420-3.0201.2...128203.02015206'05"205'20"2019501015501010.51011.610-4.0100.4...123102.01013106'25"105'30"10

20 rows × 23 columns

# 男生的成绩进行了转化
def convert(item):
    m,s = item.strip('"').split("'")
    m,s = int(m),int(s)
    return m + s/100.0
score.iloc[:,-4] = score.iloc[:,-4].map(convert)
# 女生成绩,进行转化
def convert(item):
    m,s = item.strip('"').split("'")
    m,s = int(m),int(s)
    return m + s/100.0
score.iloc[:,-2] = score.iloc[:,-2].map(convert)
score

男肺活量男肺活量女肺活量男50米跑女50米跑男体前屈女体前屈...女跳远男引体女仰卧男1000女800成绩分数成绩分数成绩分数成绩分数成绩分数成绩...成绩分数成绩分数成绩分数成绩分数成绩分数454010031501007.11007.810023.610024.2...20410016.0100531003.301003.241004420953100957.2957.99521.59522.5...1989515.09551953.35953.30954300903050907.3908.09019.49020.8...1929014.09049903.40903.36904050852900857.4858.38517.28519.1...1858513.08546853.47853.43853800802750807.5808.68015.08017.4...1788012.08043803.55803.50803680782650787.7788.87813.67816.1...17578NaN7841784.00783.55783560762550767.9769.07612.27614.8...1727611.07639764.05764.00763440742450748.1749.27410.87413.5...16974NaN7437744.10744.05743320722350728.3729.4729.47212.2...1667210.07235724.15724.10723200702250708.5709.6708.07010.9...16370NaN7033704.20704.15703080682150688.7689.8686.6689.6...160689.06831684.25684.20682960662050668.96610.0665.2668.3...15766NaN6629664.30664.25662840641950649.16410.2643.8647.0...154648.06427644.35644.30642720621850629.36210.4622.4625.7...15162NaN6225624.40624.35622600601750609.56010.6601.0604.4...148607.06023604.45604.40602470501710509.75010.8500.0503.6...143506.05021505.05504.50502340401670409.94011.040-1.0402.8...138405.04019405.25405.004022103016303010.13011.230-2.0302.0...133304.03017305.45305.103020802015902010.32011.420-3.0201.2...128203.02015206.05205.202019501015501010.51011.610-4.0100.4...123102.01013106.25105.3010

20 rows × 23 columns

data.columns
Index(['班级', '性别', '姓名', '1000米', '50米', '跳远', '体前屈', '引体', '肺活量', '身高', '体重'], dtype='object')
data.columns = ['班级', '性别', '姓名', '男1000', '男50米跑', '跳远', '体前屈', '引体', '肺活量', '身高', '体重']
data['男50米跑'] = data['男50米跑'].astype(np.float)
C:UsersLXQAnaconda3libsite-packagesipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a Dataframe.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
score['男1000']
成绩成绩分数45403.3010044203.359543003.409040503.478538003.558036804.007835604.057634404.107433204.157232004.207030804.256829604.306628404.356427204.406226004.456024705.055023405.254022105.453020806.052019506.2510
for col in [ '男1000', '男50米跑']:
    #     获取成绩的标准
    s = score[col]
    
    def convert(x):
        for i in range(len(s)):
            if x <= s['成绩'].iloc[0]:
                if x == 0:
                    return 0   #没有参加这个项目
                return 100
            elif x > s['成绩'].iloc[-1]:
                return 0 #跑的太慢
            elif (x > s['成绩'].iloc[i - 1]) and (x <= s['成绩'].iloc[i]):
                return s['分数'].iloc[i]
    
    data[col + '成绩'] = data[col].map(convert)
C:UsersLXQAnaconda3libsite-packagesipykernel_launcher.py:16: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a Dataframe.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
data.head()
班级性别姓名男1000男50米跑跳远体前屈引体肺活量身高体重男1000成绩男50米跑成绩01男高孜阳06114.138.88195.01212785170.072.6726611男郝少杰10134.167.70225.01173133174.052.7707821男郝梓烨06194.098.45218.01413901169.046.5747031男何弘源10104.218.05206.01314946183.079.7687441男刘硕鹏12123.447.52210.01393538171.054.78578
for col in ['跳远', '体前屈', '引体']:
    s = score['男'+col]
    
    def convert(x):
        for i in range(len(s)):
            if x >= s['成绩'].iloc[i]:
                return s['分数'].iloc[i]
        return 0
    
    data[col + '成绩'] = data[col].map(convert)
C:UsersLXQAnaconda3libsite-packagesipykernel_launcher.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a Dataframe.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
data.head()
班级性别姓名男1000男50米跑跳远体前屈引体肺活量身高体重男1000成绩男50米跑成绩跳远成绩体前屈成绩引体成绩01男高孜阳06114.138.88195.01212785170.072.672666074011男郝少杰10134.167.70225.01173133174.052.7707874746021男郝梓烨06194.098.45218.01413901169.046.574707078031男何弘源10104.218.05206.01314946183.079.768746476041男刘硕鹏12123.447.52210.01393538171.054.78578667668
cols = ['班级', '性别', '姓名', '男1000','男1000成绩', '男50米跑', '男50米跑成绩',
 '跳远', '跳远成绩', '体前屈', '体前屈成绩',  '引体', '引体成绩','肺活量','身高','体重']
# 根据索引的顺序去Dataframe中取值
data = data[cols]
data.head()
班级性别姓名男1000男1000成绩男50米跑男50米跑成绩跳远跳远成绩体前屈体前屈成绩引体引体成绩肺活量身高体重01男高孜阳06114.13728.8866195.0601274102785170.072.611男郝少杰10134.16707.7078225.07411747603133174.052.721男郝梓烨06194.09748.4570218.0701478103901169.046.531男何弘源10104.21688.0574206.0641376104946183.079.741男刘硕鹏12123.44857.5278210.06613769683538171.054.7
data
班级性别姓名男1000男1000成绩男50米跑男50米跑成绩跳远跳远成绩体前屈体前屈成绩引体引体成绩肺活量身高体重01男高孜阳06114.13728.8866195.0601274102785170.072.611男郝少杰10134.16707.7078225.07411747603133174.052.721男郝梓烨06194.09748.4570218.0701478103901169.046.531男何弘源10104.21688.0574206.0641376104946183.079.741男刘硕鹏12123.44857.5278210.06613769683538171.054.751男刘运硕03143.49807.9474190.05020907603970175.066.461男吕晓瑶03143.54807.7576186.04011747603710173.053.971男米孜聪06364.03768.0674195.060362105578178.083.181男聂浩然27194.01767.7576220.072158010723821175.066.591男牛苗嘉12114.12727.3885245.085178011764423167.053.9101男牛砚哲28134.00787.8276219.072137611764031173.057.4111男齐子涵185x4.13727.3785228.07697015954354163.054.6121男乔一甲06163.45857.6678202.0627683202238179.061.1131男任晓波03113.46857.6678245.0853627604811177.063.9141男戎小龙26330.0000.0000.000500000.00.0151男桑淳熙06163.57787.6078192.0507685404147174.059.2161男田晓龙24114.18708.1472210.0668704304241179.061.9171男田玉聪27163.32957.2095255.090229512805324183.063.4181男王晨宇06133.56788.1572207.064137612804363173.060.5191男王家梁06303.47858.1572202.0621376161005364174.056.0201男王乐天33313.53807.8576210.0663627603445177.056.9211男王一钊12133.57787.8576220.0729702105670177.055.5221男王子天06343.42857.2390212.068127415955709185.072.3231男王子鑫00124.03767.6878218.07015803204780177.083.7241男未晓锟12144.14728.3072206.0641580103358173.046.6251男张国瑞033x4.04768.1572205.0649705403494169.048.3261男张皓天06324.04767.5578190.05012745403286169.050.1271男张泽地03104.02767.5578240.08056412804483171.058.4281男张智贤03183.57787.8976220.07297011764254166.054.8291男赵博翰101x4.16708.1972212.068271007603498169.068.0...................................................46317男王亚楠26364.15728.3670217.07020902105452175.083.446417男陈核涛26124.36627.2290267.010066611765555179.062.246517男曹佳尧213X3.48807.3785225.074178012805519176.062.246617男贾存生78123.58787.3785236.080127411764246169.060.146717男杨辰阳06314.02768.0074210.06618857604034167.056.846817男张雨康181X4.02768.0074196.06012744305738172.066.546917男刘帅怡03194.38628.0974223.07421908645168169.078.047017男张世荣03260.0000.0000.000500000.00.047117男刘泽阳181X4.02768.3770208.06621908645677172.063.747217男王鹏鑫00144.26667.8976232.07821908647052180.082.947317男贾耀杰10344.09748.4670205.06415807604208171.061.047417男刘艺通06193.49807.6678232.078117410725897175.056.147517男段佳硕27374.36627.7776236.0801174201005158176.055.247617男刘鼎03154.37628.2772208.0661780106311177.095.647717男张浩27343.44858.2772217.07015807605075170.057.647817男庞慧谦07073.55807.9874212.068209010725564168.054.547917男李垚泽06153.41857.5778225.0749705405599181.074.848017男胡德皓36145.29309.0264210.0661274006712183.095.948117男张博03164.11727.5178238.080219014905590179.067.748217男张育森30114.56507.4280252.09097013855159180.070.048317男吴宜凯00153.54807.9674229.07614789685254182.064.148417男左一萌10150.0000.0000.000500000.00.048517男王鹏飞06154.04768.0274180.030870104592187.064.648617男张泽琼18153.54807.5178238.080137611765572176.059.548717男张晓波061X4.58508.7666200.06212749684533169.051.348817男张乔楠03114.23688.2772208.0661072004647176.069.548917男郭泽森03335.19409.5550210.06615806507042177.076.049017男陈子龙061X3.251007.5080252.090137613855755181.065.049117男王丹龙06364.39627.8176208.066147811765688172.051.749217男王玉涵06360.0000.0000.000500000.00.0

477 rows × 16 columns

def convert(x):
    if x > 100:
        return x/100
    else:
        return x
data['身高'] = data['身高'].map(convert)
data['BMI'] = (data['体重']/(data['身高'])**2).round(1)
'''≤16.4
23.3~26.3'''
def convert_bmi(x):   #在取名时可以更加准确比如取为convert_bmi
    if x >= 26.4:
        return 60
    elif (x <= 16.4) or (x >=23.3 and x <= 26.3):
        return 80
    elif x >=16.5 and x <=23.2:
        return 100
    else:
        return 0

data['BMI_score'] = data['BMI'].map(convert_bmi)
data.head(50)
班级性别姓名男1000男1000成绩男50米跑男50米跑成绩跳远跳远成绩体前屈体前屈成绩引体引体成绩肺活量身高体重BMIBMI_score01男高孜阳06114.13728.8866195.06012741027851.7072.625.18011男郝少杰10134.16707.7078225.074117476031331.7452.717.410021男郝梓烨06194.09748.4570218.07014781039011.6946.516.38031男何弘源10104.21688.0574206.06413761049461.8379.723.88041男刘硕鹏12123.44857.5278210.066137696835381.7154.718.710051男刘运硕03143.49807.9474190.050209076039701.7566.421.710061男吕晓瑶03143.54807.7576186.040117476037101.7353.918.010071男米孜聪06364.03768.0674195.0603621055781.7883.126.28081男聂浩然27194.01767.7576220.0721580107238211.7566.521.710091男牛苗嘉12114.12727.3885245.0851780117644231.6753.919.3100101男牛砚哲28134.00787.8276219.0721376117640311.7357.419.2100111男齐子涵185x4.13727.3785228.076970159543541.6354.620.6100121男乔一甲06163.45857.6678202.06276832022381.7961.119.1100131男任晓波03113.46857.6678245.08536276048111.7763.920.4100141男戎小龙26330.0000.0000.000500000.000.0NaN0151男桑淳熙06163.57787.6078192.05076854041471.7459.219.6100161男田晓龙24114.18708.1472210.06687043042411.7961.919.3100171男田玉聪27163.32957.2095255.0902295128053241.8363.418.9100181男王晨宇06133.56788.1572207.0641376128043631.7360.520.2100191男王家梁06303.47858.1572202.06213761610053641.7456.018.5100201男王乐天33313.53807.8576210.06636276034451.7756.918.2100211男王一钊12133.57787.8576220.07297021056701.7755.517.7100221男王子天06343.42857.2390212.0681274159557091.8572.321.1100231男王子鑫00124.03767.6878218.070158032047801.7783.726.760241男未晓锟12144.14728.3072206.06415801033581.7346.615.680251男张国瑞033x4.04768.1572205.06497054034941.6948.316.9100261男张皓天06324.04767.5578190.050127454032861.6950.117.5100271男张泽地03104.02767.5578240.080564128044831.7158.420.0100281男张智贤03183.57787.8976220.072970117642541.6654.819.9100291男赵博翰101x4.16708.1972212.0682710076034981.6968.023.880301男赵泽凯03114.01767.8976213.068564117643221.7455.918.5100311男赵泽宇06164.08748.2172208.06619852010039171.6651.918.8100321男左晶川12174.06748.7166206.064117443039701.7247.816.280342男贾和06334.22687.9774215.07097096838651.7558.719.2100352男李森06360.0000.0000.000500000.000.0NaN0362男李一帆18124.46508.7966172.0107681047501.7488.629.360372男李子阳06184.01767.3785210.06626076047141.8262.518.9100382男吕星繁03120.0000.0000.000500000.000.0NaN0392男赵凌云105x4.13727.7776208.06687076043271.7356.018.7100402男赵鹏悦26124.27668.8066185.040107254047451.6474.827.860423男宫诚博06123.43856.89100276.01001680128052121.8473.121.6100433男郭亚浩181X4.04767.2590240.080137686447561.7672.023.2100443男郝晓辰00133.38907.3685246.0852295117644331.8462.518.5100453男李国玺23104.19708.1772220.07218851044381.7472.223.880463男李一帆12184.08747.8076227.07615801060331.7785.627.360473男刘凡12184.09748.0674208.066107221041061.7068.723.880483男刘哲垚12174.09748.1672190.05026065042141.6760.721.8100493男米卓凡241X4.05768.1672200.062137696838571.7251.417.4100503男牛卓凡06144.02768.2772228.0761478128032661.6252.219.9100513男苏仕一12334.01768.5070215.07066696835781.6449.918.6100
# 统计分析
# 定义需求,画图,对比分析
(data['BMI_score'].value_counts()).plot(kind = 'pie',autopct = '%0.2f%%')

(data['BMI_score'].value_counts()).plot(kind = 'bar')


data.groupby(['男1000成绩'])['BMI_score'].count().plot(kind = 'bar')


欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/5156597.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-11-18
下一篇 2022-11-18

发表评论

登录后才能评论

评论列表(0条)

保存