sklearn代码18 python自动化处理数据
import numpy as np
import pandas as pd
from pandas import Series,Dataframe
import matplotlib.pyplot as plt
%matplotlib inline
data = pd.read_excel('./18级高一体测成绩汇总.xls')
data
| 班级 | 性别 | 姓名 | 1000米 | 50米 | 跳远 | 体前屈 | 引体 | 肺活量 | 身高 | 体重 |
---|
0 | 1男高孜阳06114'138.88195121278517072.6
---|
1 | 1男郝少杰10134'167.70225117313317452.7
---|
2 | 1男郝梓烨06194'098.45218141390116946.5
---|
3 | 1男何弘源10104'218.05206131494618379.7
---|
4 | 1男刘硕鹏12123'447.52210139353817154.7
---|
5 | 1男刘运硕03143'497.94190207397017566.4
---|
6 | 1男吕晓瑶03143'547.75186117371017353.9
---|
7 | 1男米孜聪06364'38.0619531557817883.1
---|
8 | 1男聂浩然27194'017.752201510382117566.5
---|
9 | 1男牛苗嘉12114'127.382451711442316753.9
---|
10 | 1男牛砚哲281347.822191311403117357.4
---|
11 | 1男齐子涵185x4'137.37228915435416354.6
---|
12 | 1男乔一甲06163'457.6620273223817961.1
---|
13 | 1男任晓波03113'467.6624537481117763.9
---|
14 | 1男戎小龙26330000000NaN
---|
15 | 1男桑淳熙06163'577.6019275414717459.2
---|
16 | 1男田晓龙24114'188.1421084424117961.9
---|
17 | 1男田玉聪27163'327.202552212532418363.4
---|
18 | 1男王晨宇06133'568.152071312436317360.5
---|
19 | 1男王家梁06303'478.152021316536417456
---|
20 | 1男王乐天33313'537.8521037344517756.9
---|
21 | 1男王一钊12133'577.8522092567017755.5
---|
22 | 1男王子天06343'427.232121215570918572.3
---|
23 | 1男王子鑫00124'37.68218153478017783.7
---|
24 | 1男未晓锟12144'148.30206151335817346.6
---|
25 | 1男张国瑞033x4'048.1520595349416948.3
---|
26 | 1男张皓天06324'047.55190125328616950.1
---|
27 | 1男张泽地03104'027.55240512448317158.4
---|
28 | 1男张智贤03183'577.89220911425416654.8
---|
29 | 1男赵博翰101x4'168.19212277349816968
---|
... | .................................
---|
463 | 17男王亚楠26364'158.36217202545217583.4
---|
464 | 17男陈核涛26124'367.22267611555517962.2
---|
465 | 17男曹佳尧213X3'487.372251712551917662.2
---|
466 | 17男贾存生78123'587.372361211424616960.1
---|
467 | 17男杨辰阳06314'028210187403416756.8
---|
468 | 17男张雨康181X4'028196124573817266.5
---|
469 | 17男刘帅怡03194'388.09223218516816978
---|
470 | 17男张世荣0326NaNNaNNaNNaNNaNNaNNaNNaN
---|
471 | 17男刘泽阳181X4'28.37208218567717263.7
---|
472 | 17男王鹏鑫00144'267.89232218705218082.9
---|
473 | 17男贾耀杰10344'098.46205157420817161
---|
474 | 17男刘艺通06193'497.662321110589717556.1
---|
475 | 17男段佳硕27374'367.772361120515817655.2
---|
476 | 17男刘鼎03154'378.27208171631117795.6
---|
477 | 17男张浩27343'448.27217157507517057.6
---|
478 | 17男庞慧谦07073'557.982122010556416854.5
---|
479 | 17男李垚泽06153'417.5722595559918174.8
---|
480 | 17男胡德皓36145'299.02210120671218395.9
---|
481 | 17男张博03164'117.512382114559017967.7
---|
482 | 17男张育森30114'567.42252913515918070
---|
483 | 17男吴宜凯00153'547.96229149525418264.1
---|
484 | 17男左一萌1015NaNNaNNaNNaNNaNNaNNaNNaN
---|
485 | 17男王鹏飞06154'48.0218081459218764.6
---|
486 | 17男张泽琼18153'547.512381311557217659.5
---|
487 | 17男张晓波061X4'588.76200129453316951.3
---|
488 | 17男张乔楠03114'238.27208100464717669.5
---|
489 | 17男郭泽森03335'199.55210156704217776
---|
490 | 17男陈子龙061X3'257.52521313575518165
---|
491 | 17男王丹龙06364'397.812081411568817251.7
---|
492 | 17男王玉涵0636NaNNaNNaNNaNNaNNaNNaNNaN
493 rows × 11 columns
data[:45]
| 班级 | 性别 | 姓名 | 1000米 | 50米 | 跳远 | 体前屈 | 引体 | 肺活量 | 身高 | 体重 |
---|
0 | 1男高孜阳06114'138.88195121278517072.6
---|
1 | 1男郝少杰10134'167.70225117313317452.7
---|
2 | 1男郝梓烨06194'098.45218141390116946.5
---|
3 | 1男何弘源10104'218.05206131494618379.7
---|
4 | 1男刘硕鹏12123'447.52210139353817154.7
---|
5 | 1男刘运硕03143'497.94190207397017566.4
---|
6 | 1男吕晓瑶03143'547.75186117371017353.9
---|
7 | 1男米孜聪06364'38.0619531557817883.1
---|
8 | 1男聂浩然27194'017.752201510382117566.5
---|
9 | 1男牛苗嘉12114'127.382451711442316753.9
---|
10 | 1男牛砚哲281347.822191311403117357.4
---|
11 | 1男齐子涵185x4'137.37228915435416354.6
---|
12 | 1男乔一甲06163'457.6620273223817961.1
---|
13 | 1男任晓波03113'467.6624537481117763.9
---|
14 | 1男戎小龙26330000000NaN
---|
15 | 1男桑淳熙06163'577.6019275414717459.2
---|
16 | 1男田晓龙24114'188.1421084424117961.9
---|
17 | 1男田玉聪27163'327.202552212532418363.4
---|
18 | 1男王晨宇06133'568.152071312436317360.5
---|
19 | 1男王家梁06303'478.152021316536417456
---|
20 | 1男王乐天33313'537.8521037344517756.9
---|
21 | 1男王一钊12133'577.8522092567017755.5
---|
22 | 1男王子天06343'427.232121215570918572.3
---|
23 | 1男王子鑫00124'37.68218153478017783.7
---|
24 | 1男未晓锟12144'148.30206151335817346.6
---|
25 | 1男张国瑞033x4'048.1520595349416948.3
---|
26 | 1男张皓天06324'047.55190125328616950.1
---|
27 | 1男张泽地03104'027.55240512448317158.4
---|
28 | 1男张智贤03183'577.89220911425416654.8
---|
29 | 1男赵博翰101x4'168.19212277349816968
---|
30 | 1男赵泽凯03114'017.89213511432217455.9
---|
31 | 1男赵泽宇06164'088.212081920391716651.9
---|
32 | 1男左晶川12174'068.71206114397017247.8
---|
33 | 班级性别姓名800米50米跳远体前屈仰卧起坐肺活量身高体重
---|
34 | 2男贾和06334'227.9721599386517558.7
---|
35 | 2男李森06360000000NaN
---|
36 | 2男李一帆18124'468.7917271475017488.6
---|
37 | 2男李子阳06184'017.3721027471418262.5
---|
38 | 2男吕星繁03120000000NaN
---|
39 | 2男赵凌云105x4'137.7720887432717356
---|
40 | 2男赵鹏悦26124'278.8185105474516474.8
---|
41 | 班级性别姓名1000米50米跳远体前屈引体肺活量身高体重
---|
42 | 3男宫诚博06123'436.89276161252121.8473.1
---|
43 | 3男郭亚浩181X4'047.2524013847561.7672
---|
44 | 3男郝晓辰00133'387.36246221144331.8462.5
cond = data['班级']!='班级' # 删除多余的班级信息
data = data[cond]
data[:45]
| 班级 | 性别 | 姓名 | 1000米 | 50米 | 跳远 | 体前屈 | 引体 | 肺活量 | 身高 | 体重 |
---|
0 | 1男高孜阳06114'138.88195121278517072.6
---|
1 | 1男郝少杰10134'167.70225117313317452.7
---|
2 | 1男郝梓烨06194'098.45218141390116946.5
---|
3 | 1男何弘源10104'218.05206131494618379.7
---|
4 | 1男刘硕鹏12123'447.52210139353817154.7
---|
5 | 1男刘运硕03143'497.94190207397017566.4
---|
6 | 1男吕晓瑶03143'547.75186117371017353.9
---|
7 | 1男米孜聪06364'38.0619531557817883.1
---|
8 | 1男聂浩然27194'017.752201510382117566.5
---|
9 | 1男牛苗嘉12114'127.382451711442316753.9
---|
10 | 1男牛砚哲281347.822191311403117357.4
---|
11 | 1男齐子涵185x4'137.37228915435416354.6
---|
12 | 1男乔一甲06163'457.6620273223817961.1
---|
13 | 1男任晓波03113'467.6624537481117763.9
---|
14 | 1男戎小龙26330000000NaN
---|
15 | 1男桑淳熙06163'577.6019275414717459.2
---|
16 | 1男田晓龙24114'188.1421084424117961.9
---|
17 | 1男田玉聪27163'327.202552212532418363.4
---|
18 | 1男王晨宇06133'568.152071312436317360.5
---|
19 | 1男王家梁06303'478.152021316536417456
---|
20 | 1男王乐天33313'537.8521037344517756.9
---|
21 | 1男王一钊12133'577.8522092567017755.5
---|
22 | 1男王子天06343'427.232121215570918572.3
---|
23 | 1男王子鑫00124'37.68218153478017783.7
---|
24 | 1男未晓锟12144'148.30206151335817346.6
---|
25 | 1男张国瑞033x4'048.1520595349416948.3
---|
26 | 1男张皓天06324'047.55190125328616950.1
---|
27 | 1男张泽地03104'027.55240512448317158.4
---|
28 | 1男张智贤03183'577.89220911425416654.8
---|
29 | 1男赵博翰101x4'168.19212277349816968
---|
30 | 1男赵泽凯03114'017.89213511432217455.9
---|
31 | 1男赵泽宇06164'088.212081920391716651.9
---|
32 | 1男左晶川12174'068.71206114397017247.8
---|
34 | 2男贾和06334'227.9721599386517558.7
---|
35 | 2男李森06360000000NaN
---|
36 | 2男李一帆18124'468.7917271475017488.6
---|
37 | 2男李子阳06184'017.3721027471418262.5
---|
38 | 2男吕星繁03120000000NaN
---|
39 | 2男赵凌云105x4'137.7720887432717356
---|
40 | 2男赵鹏悦26124'278.8185105474516474.8
---|
42 | 3男宫诚博06123'436.89276161252121.8473.1
---|
43 | 3男郭亚浩181X4'047.2524013847561.7672
---|
44 | 3男郝晓辰00133'387.36246221144331.8462.5
---|
45 | 3男李国玺23104'198.1722018144381.7472.2
---|
46 | 3男李一帆12184'087.822715160331.7785.6
data.fillna(0,inplace=True) #没参加处理为0
C:UsersLXQAnaconda3libsite-packagespandascoreframe.py:2754: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a Dataframe
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
downcast=downcast, **kwargs)
# 没有空数据了
data.isnull().any()
班级 False
性别 False
姓名 False
1000米 False
50米 False
跳远 False
体前屈 False
引体 False
肺活量 False
身高 False
体重 False
dtype: bool
data.head()
| 班级 | 性别 | 姓名 | 1000米 | 50米 | 跳远 | 体前屈 | 引体 | 肺活量 | 身高 | 体重 |
---|
0 | 1男高孜阳06114'138.88195.01212785170.072.6
---|
1 | 1男郝少杰10134'167.70225.01173133174.052.7
---|
2 | 1男郝梓烨06194'098.45218.01413901169.046.5
---|
3 | 1男何弘源10104'218.05206.01314946183.079.7
---|
4 | 1男刘硕鹏12123'447.52210.01393538171.054.7
def convert(x): #将数据转换为成绩
if isinstance(x,str):
minute,second = x.split("'")
minute = int(minute)
second = int(second)
return minute + second/100.0
else:
return x
data['1000米'] = data['1000米'].map(convert)
data.head()
C:UsersLXQAnaconda3libsite-packagesipykernel_launcher.py:10: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a Dataframe.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
# Remove the CWD from sys.path while we load stuff.
| 班级 | 性别 | 姓名 | 1000米 | 50米 | 跳远 | 体前屈 | 引体 | 肺活量 | 身高 | 体重 |
---|
0 | 1男高孜阳06114.138.88195.01212785170.072.6
---|
1 | 1男郝少杰10134.167.70225.01173133174.052.7
---|
2 | 1男郝梓烨06194.098.45218.01413901169.046.5
---|
3 | 1男何弘源10104.218.05206.01314946183.079.7
---|
4 | 1男刘硕鹏12123.447.52210.01393538171.054.7
score = pd.read_excel('体侧成绩评分表.xls',header = [0,1])
score
男肺活量 | 男肺活量 | 女肺活量 | 男50米跑 | 女50米跑 | 男体前屈 | 女体前屈 | ... | 女跳远 | 男引体 | 女仰卧 | 男1000 | 女800 |
---|
成绩 | 分数 | 成绩 | 分数 | 成绩 | 分数 | 成绩 | 分数 | 成绩 | 分数 | 成绩 | ... | 成绩 | 分数 | 成绩 | 分数 | 成绩 | 分数 | 成绩 | 分数 | 成绩 | 分数 |
---|
4540 | 10031501007.11007.810023.610024.2...20410016.0100531003'30"1003'24"100
---|
4420 | 953100957.2957.99521.59522.5...1989515.09551953'35"953'30"95
---|
4300 | 903050907.3908.09019.49020.8...1929014.09049903'40"903'36"90
---|
4050 | 852900857.4858.38517.28519.1...1858513.08546853'47"853'43"85
---|
3800 | 802750807.5808.68015.08017.4...1788012.08043803'55"803'50"80
---|
3680 | 782650787.7788.87813.67816.1...17578NaN7841784'00"783'55"78
---|
3560 | 762550767.9769.07612.27614.8...1727611.07639764'05"764'00"76
---|
3440 | 742450748.1749.27410.87413.5...16974NaN7437744'10"744'05"74
---|
3320 | 722350728.3729.4729.47212.2...1667210.07235724'15"724'10"72
---|
3200 | 702250708.5709.6708.07010.9...16370NaN7033704'20"704'15"70
---|
3080 | 682150688.7689.8686.6689.6...160689.06831684'25"684'20"68
---|
2960 | 662050668.96610.0665.2668.3...15766NaN6629664'30"664'25"66
---|
2840 | 641950649.16410.2643.8647.0...154648.06427644'35"644'30"64
---|
2720 | 621850629.36210.4622.4625.7...15162NaN6225624'40"624'35"62
---|
2600 | 601750609.56010.6601.0604.4...148607.06023604'45"604'40"60
---|
2470 | 501710509.75010.8500.0503.6...143506.05021505'05"504'50"50
---|
2340 | 401670409.94011.040-1.0402.8...138405.04019405'25"405'00"40
---|
2210 | 3016303010.13011.230-2.0302.0...133304.03017305'45"305'10"30
---|
2080 | 2015902010.32011.420-3.0201.2...128203.02015206'05"205'20"20
---|
1950 | 1015501010.51011.610-4.0100.4...123102.01013106'25"105'30"10
20 rows × 23 columns
# 男生的成绩进行了转化
def convert(item):
m,s = item.strip('"').split("'")
m,s = int(m),int(s)
return m + s/100.0
score.iloc[:,-4] = score.iloc[:,-4].map(convert)
# 女生成绩,进行转化
def convert(item):
m,s = item.strip('"').split("'")
m,s = int(m),int(s)
return m + s/100.0
score.iloc[:,-2] = score.iloc[:,-2].map(convert)
score
男肺活量 | 男肺活量 | 女肺活量 | 男50米跑 | 女50米跑 | 男体前屈 | 女体前屈 | ... | 女跳远 | 男引体 | 女仰卧 | 男1000 | 女800 |
---|
成绩 | 分数 | 成绩 | 分数 | 成绩 | 分数 | 成绩 | 分数 | 成绩 | 分数 | 成绩 | ... | 成绩 | 分数 | 成绩 | 分数 | 成绩 | 分数 | 成绩 | 分数 | 成绩 | 分数 |
---|
4540 | 10031501007.11007.810023.610024.2...20410016.0100531003.301003.24100
---|
4420 | 953100957.2957.99521.59522.5...1989515.09551953.35953.3095
---|
4300 | 903050907.3908.09019.49020.8...1929014.09049903.40903.3690
---|
4050 | 852900857.4858.38517.28519.1...1858513.08546853.47853.4385
---|
3800 | 802750807.5808.68015.08017.4...1788012.08043803.55803.5080
---|
3680 | 782650787.7788.87813.67816.1...17578NaN7841784.00783.5578
---|
3560 | 762550767.9769.07612.27614.8...1727611.07639764.05764.0076
---|
3440 | 742450748.1749.27410.87413.5...16974NaN7437744.10744.0574
---|
3320 | 722350728.3729.4729.47212.2...1667210.07235724.15724.1072
---|
3200 | 702250708.5709.6708.07010.9...16370NaN7033704.20704.1570
---|
3080 | 682150688.7689.8686.6689.6...160689.06831684.25684.2068
---|
2960 | 662050668.96610.0665.2668.3...15766NaN6629664.30664.2566
---|
2840 | 641950649.16410.2643.8647.0...154648.06427644.35644.3064
---|
2720 | 621850629.36210.4622.4625.7...15162NaN6225624.40624.3562
---|
2600 | 601750609.56010.6601.0604.4...148607.06023604.45604.4060
---|
2470 | 501710509.75010.8500.0503.6...143506.05021505.05504.5050
---|
2340 | 401670409.94011.040-1.0402.8...138405.04019405.25405.0040
---|
2210 | 3016303010.13011.230-2.0302.0...133304.03017305.45305.1030
---|
2080 | 2015902010.32011.420-3.0201.2...128203.02015206.05205.2020
---|
1950 | 1015501010.51011.610-4.0100.4...123102.01013106.25105.3010
20 rows × 23 columns
data.columns
Index(['班级', '性别', '姓名', '1000米', '50米', '跳远', '体前屈', '引体', '肺活量', '身高', '体重'], dtype='object')
data.columns = ['班级', '性别', '姓名', '男1000', '男50米跑', '跳远', '体前屈', '引体', '肺活量', '身高', '体重']
data['男50米跑'] = data['男50米跑'].astype(np.float)
C:UsersLXQAnaconda3libsite-packagesipykernel_launcher.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a Dataframe.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
"""Entry point for launching an IPython kernel.
score['男1000']
成绩 | 成绩 | 分数 |
---|
4540 | 3.30100
---|
4420 | 3.3595
---|
4300 | 3.4090
---|
4050 | 3.4785
---|
3800 | 3.5580
---|
3680 | 4.0078
---|
3560 | 4.0576
---|
3440 | 4.1074
---|
3320 | 4.1572
---|
3200 | 4.2070
---|
3080 | 4.2568
---|
2960 | 4.3066
---|
2840 | 4.3564
---|
2720 | 4.4062
---|
2600 | 4.4560
---|
2470 | 5.0550
---|
2340 | 5.2540
---|
2210 | 5.4530
---|
2080 | 6.0520
---|
1950 | 6.2510
for col in [ '男1000', '男50米跑']:
# 获取成绩的标准
s = score[col]
def convert(x):
for i in range(len(s)):
if x <= s['成绩'].iloc[0]:
if x == 0:
return 0 #没有参加这个项目
return 100
elif x > s['成绩'].iloc[-1]:
return 0 #跑的太慢
elif (x > s['成绩'].iloc[i - 1]) and (x <= s['成绩'].iloc[i]):
return s['分数'].iloc[i]
data[col + '成绩'] = data[col].map(convert)
C:UsersLXQAnaconda3libsite-packagesipykernel_launcher.py:16: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a Dataframe.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
app.launch_new_instance()
data.head()
| 班级 | 性别 | 姓名 | 男1000 | 男50米跑 | 跳远 | 体前屈 | 引体 | 肺活量 | 身高 | 体重 | 男1000成绩 | 男50米跑成绩 |
---|
0 | 1男高孜阳06114.138.88195.01212785170.072.67266
---|
1 | 1男郝少杰10134.167.70225.01173133174.052.77078
---|
2 | 1男郝梓烨06194.098.45218.01413901169.046.57470
---|
3 | 1男何弘源10104.218.05206.01314946183.079.76874
---|
4 | 1男刘硕鹏12123.447.52210.01393538171.054.78578
for col in ['跳远', '体前屈', '引体']:
s = score['男'+col]
def convert(x):
for i in range(len(s)):
if x >= s['成绩'].iloc[i]:
return s['分数'].iloc[i]
return 0
data[col + '成绩'] = data[col].map(convert)
C:UsersLXQAnaconda3libsite-packagesipykernel_launcher.py:10: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a Dataframe.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
# Remove the CWD from sys.path while we load stuff.
data.head()
| 班级 | 性别 | 姓名 | 男1000 | 男50米跑 | 跳远 | 体前屈 | 引体 | 肺活量 | 身高 | 体重 | 男1000成绩 | 男50米跑成绩 | 跳远成绩 | 体前屈成绩 | 引体成绩 |
---|
0 | 1男高孜阳06114.138.88195.01212785170.072.6726660740
---|
1 | 1男郝少杰10134.167.70225.01173133174.052.77078747460
---|
2 | 1男郝梓烨06194.098.45218.01413901169.046.5747070780
---|
3 | 1男何弘源10104.218.05206.01314946183.079.7687464760
---|
4 | 1男刘硕鹏12123.447.52210.01393538171.054.78578667668
cols = ['班级', '性别', '姓名', '男1000','男1000成绩', '男50米跑', '男50米跑成绩',
'跳远', '跳远成绩', '体前屈', '体前屈成绩', '引体', '引体成绩','肺活量','身高','体重']
# 根据索引的顺序去Dataframe中取值
data = data[cols]
data.head()
| 班级 | 性别 | 姓名 | 男1000 | 男1000成绩 | 男50米跑 | 男50米跑成绩 | 跳远 | 跳远成绩 | 体前屈 | 体前屈成绩 | 引体 | 引体成绩 | 肺活量 | 身高 | 体重 |
---|
0 | 1男高孜阳06114.13728.8866195.0601274102785170.072.6
---|
1 | 1男郝少杰10134.16707.7078225.07411747603133174.052.7
---|
2 | 1男郝梓烨06194.09748.4570218.0701478103901169.046.5
---|
3 | 1男何弘源10104.21688.0574206.0641376104946183.079.7
---|
4 | 1男刘硕鹏12123.44857.5278210.06613769683538171.054.7
data
| 班级 | 性别 | 姓名 | 男1000 | 男1000成绩 | 男50米跑 | 男50米跑成绩 | 跳远 | 跳远成绩 | 体前屈 | 体前屈成绩 | 引体 | 引体成绩 | 肺活量 | 身高 | 体重 |
---|
0 | 1男高孜阳06114.13728.8866195.0601274102785170.072.6
---|
1 | 1男郝少杰10134.16707.7078225.07411747603133174.052.7
---|
2 | 1男郝梓烨06194.09748.4570218.0701478103901169.046.5
---|
3 | 1男何弘源10104.21688.0574206.0641376104946183.079.7
---|
4 | 1男刘硕鹏12123.44857.5278210.06613769683538171.054.7
---|
5 | 1男刘运硕03143.49807.9474190.05020907603970175.066.4
---|
6 | 1男吕晓瑶03143.54807.7576186.04011747603710173.053.9
---|
7 | 1男米孜聪06364.03768.0674195.060362105578178.083.1
---|
8 | 1男聂浩然27194.01767.7576220.072158010723821175.066.5
---|
9 | 1男牛苗嘉12114.12727.3885245.085178011764423167.053.9
---|
10 | 1男牛砚哲28134.00787.8276219.072137611764031173.057.4
---|
11 | 1男齐子涵185x4.13727.3785228.07697015954354163.054.6
---|
12 | 1男乔一甲06163.45857.6678202.0627683202238179.061.1
---|
13 | 1男任晓波03113.46857.6678245.0853627604811177.063.9
---|
14 | 1男戎小龙26330.0000.0000.000500000.00.0
---|
15 | 1男桑淳熙06163.57787.6078192.0507685404147174.059.2
---|
16 | 1男田晓龙24114.18708.1472210.0668704304241179.061.9
---|
17 | 1男田玉聪27163.32957.2095255.090229512805324183.063.4
---|
18 | 1男王晨宇06133.56788.1572207.064137612804363173.060.5
---|
19 | 1男王家梁06303.47858.1572202.0621376161005364174.056.0
---|
20 | 1男王乐天33313.53807.8576210.0663627603445177.056.9
---|
21 | 1男王一钊12133.57787.8576220.0729702105670177.055.5
---|
22 | 1男王子天06343.42857.2390212.068127415955709185.072.3
---|
23 | 1男王子鑫00124.03767.6878218.07015803204780177.083.7
---|
24 | 1男未晓锟12144.14728.3072206.0641580103358173.046.6
---|
25 | 1男张国瑞033x4.04768.1572205.0649705403494169.048.3
---|
26 | 1男张皓天06324.04767.5578190.05012745403286169.050.1
---|
27 | 1男张泽地03104.02767.5578240.08056412804483171.058.4
---|
28 | 1男张智贤03183.57787.8976220.07297011764254166.054.8
---|
29 | 1男赵博翰101x4.16708.1972212.068271007603498169.068.0
---|
... | ................................................
---|
463 | 17男王亚楠26364.15728.3670217.07020902105452175.083.4
---|
464 | 17男陈核涛26124.36627.2290267.010066611765555179.062.2
---|
465 | 17男曹佳尧213X3.48807.3785225.074178012805519176.062.2
---|
466 | 17男贾存生78123.58787.3785236.080127411764246169.060.1
---|
467 | 17男杨辰阳06314.02768.0074210.06618857604034167.056.8
---|
468 | 17男张雨康181X4.02768.0074196.06012744305738172.066.5
---|
469 | 17男刘帅怡03194.38628.0974223.07421908645168169.078.0
---|
470 | 17男张世荣03260.0000.0000.000500000.00.0
---|
471 | 17男刘泽阳181X4.02768.3770208.06621908645677172.063.7
---|
472 | 17男王鹏鑫00144.26667.8976232.07821908647052180.082.9
---|
473 | 17男贾耀杰10344.09748.4670205.06415807604208171.061.0
---|
474 | 17男刘艺通06193.49807.6678232.078117410725897175.056.1
---|
475 | 17男段佳硕27374.36627.7776236.0801174201005158176.055.2
---|
476 | 17男刘鼎03154.37628.2772208.0661780106311177.095.6
---|
477 | 17男张浩27343.44858.2772217.07015807605075170.057.6
---|
478 | 17男庞慧谦07073.55807.9874212.068209010725564168.054.5
---|
479 | 17男李垚泽06153.41857.5778225.0749705405599181.074.8
---|
480 | 17男胡德皓36145.29309.0264210.0661274006712183.095.9
---|
481 | 17男张博03164.11727.5178238.080219014905590179.067.7
---|
482 | 17男张育森30114.56507.4280252.09097013855159180.070.0
---|
483 | 17男吴宜凯00153.54807.9674229.07614789685254182.064.1
---|
484 | 17男左一萌10150.0000.0000.000500000.00.0
---|
485 | 17男王鹏飞06154.04768.0274180.030870104592187.064.6
---|
486 | 17男张泽琼18153.54807.5178238.080137611765572176.059.5
---|
487 | 17男张晓波061X4.58508.7666200.06212749684533169.051.3
---|
488 | 17男张乔楠03114.23688.2772208.0661072004647176.069.5
---|
489 | 17男郭泽森03335.19409.5550210.06615806507042177.076.0
---|
490 | 17男陈子龙061X3.251007.5080252.090137613855755181.065.0
---|
491 | 17男王丹龙06364.39627.8176208.066147811765688172.051.7
---|
492 | 17男王玉涵06360.0000.0000.000500000.00.0
477 rows × 16 columns
def convert(x):
if x > 100:
return x/100
else:
return x
data['身高'] = data['身高'].map(convert)
data['BMI'] = (data['体重']/(data['身高'])**2).round(1)
'''≤16.4
23.3~26.3'''
def convert_bmi(x): #在取名时可以更加准确比如取为convert_bmi
if x >= 26.4:
return 60
elif (x <= 16.4) or (x >=23.3 and x <= 26.3):
return 80
elif x >=16.5 and x <=23.2:
return 100
else:
return 0
data['BMI_score'] = data['BMI'].map(convert_bmi)
data.head(50)
| 班级 | 性别 | 姓名 | 男1000 | 男1000成绩 | 男50米跑 | 男50米跑成绩 | 跳远 | 跳远成绩 | 体前屈 | 体前屈成绩 | 引体 | 引体成绩 | 肺活量 | 身高 | 体重 | BMI | BMI_score |
---|
0 | 1男高孜阳06114.13728.8866195.06012741027851.7072.625.180
---|
1 | 1男郝少杰10134.16707.7078225.074117476031331.7452.717.4100
---|
2 | 1男郝梓烨06194.09748.4570218.07014781039011.6946.516.380
---|
3 | 1男何弘源10104.21688.0574206.06413761049461.8379.723.880
---|
4 | 1男刘硕鹏12123.44857.5278210.066137696835381.7154.718.7100
---|
5 | 1男刘运硕03143.49807.9474190.050209076039701.7566.421.7100
---|
6 | 1男吕晓瑶03143.54807.7576186.040117476037101.7353.918.0100
---|
7 | 1男米孜聪06364.03768.0674195.0603621055781.7883.126.280
---|
8 | 1男聂浩然27194.01767.7576220.0721580107238211.7566.521.7100
---|
9 | 1男牛苗嘉12114.12727.3885245.0851780117644231.6753.919.3100
---|
10 | 1男牛砚哲28134.00787.8276219.0721376117640311.7357.419.2100
---|
11 | 1男齐子涵185x4.13727.3785228.076970159543541.6354.620.6100
---|
12 | 1男乔一甲06163.45857.6678202.06276832022381.7961.119.1100
---|
13 | 1男任晓波03113.46857.6678245.08536276048111.7763.920.4100
---|
14 | 1男戎小龙26330.0000.0000.000500000.000.0NaN0
---|
15 | 1男桑淳熙06163.57787.6078192.05076854041471.7459.219.6100
---|
16 | 1男田晓龙24114.18708.1472210.06687043042411.7961.919.3100
---|
17 | 1男田玉聪27163.32957.2095255.0902295128053241.8363.418.9100
---|
18 | 1男王晨宇06133.56788.1572207.0641376128043631.7360.520.2100
---|
19 | 1男王家梁06303.47858.1572202.06213761610053641.7456.018.5100
---|
20 | 1男王乐天33313.53807.8576210.06636276034451.7756.918.2100
---|
21 | 1男王一钊12133.57787.8576220.07297021056701.7755.517.7100
---|
22 | 1男王子天06343.42857.2390212.0681274159557091.8572.321.1100
---|
23 | 1男王子鑫00124.03767.6878218.070158032047801.7783.726.760
---|
24 | 1男未晓锟12144.14728.3072206.06415801033581.7346.615.680
---|
25 | 1男张国瑞033x4.04768.1572205.06497054034941.6948.316.9100
---|
26 | 1男张皓天06324.04767.5578190.050127454032861.6950.117.5100
---|
27 | 1男张泽地03104.02767.5578240.080564128044831.7158.420.0100
---|
28 | 1男张智贤03183.57787.8976220.072970117642541.6654.819.9100
---|
29 | 1男赵博翰101x4.16708.1972212.0682710076034981.6968.023.880
---|
30 | 1男赵泽凯03114.01767.8976213.068564117643221.7455.918.5100
---|
31 | 1男赵泽宇06164.08748.2172208.06619852010039171.6651.918.8100
---|
32 | 1男左晶川12174.06748.7166206.064117443039701.7247.816.280
---|
34 | 2男贾和06334.22687.9774215.07097096838651.7558.719.2100
---|
35 | 2男李森06360.0000.0000.000500000.000.0NaN0
---|
36 | 2男李一帆18124.46508.7966172.0107681047501.7488.629.360
---|
37 | 2男李子阳06184.01767.3785210.06626076047141.8262.518.9100
---|
38 | 2男吕星繁03120.0000.0000.000500000.000.0NaN0
---|
39 | 2男赵凌云105x4.13727.7776208.06687076043271.7356.018.7100
---|
40 | 2男赵鹏悦26124.27668.8066185.040107254047451.6474.827.860
---|
42 | 3男宫诚博06123.43856.89100276.01001680128052121.8473.121.6100
---|
43 | 3男郭亚浩181X4.04767.2590240.080137686447561.7672.023.2100
---|
44 | 3男郝晓辰00133.38907.3685246.0852295117644331.8462.518.5100
---|
45 | 3男李国玺23104.19708.1772220.07218851044381.7472.223.880
---|
46 | 3男李一帆12184.08747.8076227.07615801060331.7785.627.360
---|
47 | 3男刘凡12184.09748.0674208.066107221041061.7068.723.880
---|
48 | 3男刘哲垚12174.09748.1672190.05026065042141.6760.721.8100
---|
49 | 3男米卓凡241X4.05768.1672200.062137696838571.7251.417.4100
---|
50 | 3男牛卓凡06144.02768.2772228.0761478128032661.6252.219.9100
---|
51 | 3男苏仕一12334.01768.5070215.07066696835781.6449.918.6100
# 统计分析
# 定义需求,画图,对比分析
(data['BMI_score'].value_counts()).plot(kind = 'pie',autopct = '%0.2f%%')
(data['BMI_score'].value_counts()).plot(kind = 'bar')
data.groupby(['男1000成绩'])['BMI_score'].count().plot(kind = 'bar')
评论列表(0条)