- 【Python数据分析与可视化】Numpy统计分析-实训
- 导包
- 数据读入
- 数据清洗
- 1.创建数据类型
- 2.创建二维数组
- 3.类型转化
- 4.数据排序
- 5.数据去重
- 6.对指定列求和、均值、标准差、方差、最小值和最大值
import numpy as np
import csv
数据读入
iris_data=[]
file=open("iris.csv",encoding="gbk")
csv_reader=csv.reader(file)
iris_header=next(csv_reader)
print(iris_header)
for row in csv_reader:
iris_data.append(row)
print(iris_data[:10])
['序号', '萼片长', '萼片宽', '花瓣长', '花瓣宽', '种类']
[['1', '5.1', '3.5', '1.4', '0.2', 'setosa'], ['2', '4.9', '3', '1.4', '0.2', 'setosa'], ['3', '4.7', '3.2', '1.3', '0.2', 'setosa'], ['4', '4.6', '3.1', '1.5', '0.2', 'setosa'], ['5', '5', '3.6', '1.4', '0.2', 'setosa'], ['6', '5.4', '3.9', '1.7', '0.4', 'setosa'], ['7', '4.6', '3.4', '1.4', '0.3', 'setosa'], ['8', '5', '3.4', '1.5', '0.2', 'setosa'], ['9', '4.4', '2.9', '1.4', '0.2', 'setosa'], ['10', '4.9', '3.1', '1.5', '0.1', 'setosa']]
数据清洗
iris_list=[]
for row in iris_data:
iris_list.append(tuple(row[1:]))
print(iris_list[:10])
[('5.1', '3.5', '1.4', '0.2', 'setosa'), ('4.9', '3', '1.4', '0.2', 'setosa'), ('4.7', '3.2', '1.3', '0.2', 'setosa'), ('4.6', '3.1', '1.5', '0.2', 'setosa'), ('5', '3.6', '1.4', '0.2', 'setosa'), ('5.4', '3.9', '1.7', '0.4', 'setosa'), ('4.6', '3.4', '1.4', '0.3', 'setosa'), ('5', '3.4', '1.5', '0.2', 'setosa'), ('4.4', '2.9', '1.4', '0.2', 'setosa'), ('4.9', '3.1', '1.5', '0.1', 'setosa')]
1.创建数据类型
datatype=np.dtype([("sepaL",np.str_,40),("sepaW",np.str_,40),("petalL",np.str_,40),("petalW",np.str_,40),("range",np.str_,40)])
print(datatype)
[('sepaL', '
2.创建二维数组
iris_data_arr=np.array(iris_list,dtype=datatype)
print(iris_data_arr.shape)
print(iris_data_arr[:10])
(150,)
[('5.1', '3.5', '1.4', '0.2', 'setosa')
('4.9', '3', '1.4', '0.2', 'setosa')
('4.7', '3.2', '1.3', '0.2', 'setosa')
('4.6', '3.1', '1.5', '0.2', 'setosa')
('5', '3.6', '1.4', '0.2', 'setosa')
('5.4', '3.9', '1.7', '0.4', 'setosa')
('4.6', '3.4', '1.4', '0.3', 'setosa')
('5', '3.4', '1.5', '0.2', 'setosa')
('4.4', '2.9', '1.4', '0.2', 'setosa')
('4.9', '3.1', '1.5', '0.1', 'setosa')]
3.类型转化
SepaLong=iris_data_arr["sepaL"].astype(float)
print(SepaLong[:10])
PetalLong=iris_data_arr["petalL"].astype(float)
print(PetalLong[:10])
[5.1 4.9 4.7 4.6 5. 5.4 4.6 5. 4.4 4.9]
[1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5]
4.数据排序
#排序
SepaLong.sort()
print(SepaLong[:10])
PetalLong.sort()
print(PetalLong[:10])
[4.3 4.4 4.4 4.4 4.5 4.6 4.6 4.6 4.6 4.7]
[1. 1.1 1.2 1.2 1.3 1.3 1.3 1.3 1.3 1.3]
5.数据去重
#去重,并求出和
unique_arr = np.unique(SepaLong)
print(unique_arr)
print(unique_arr.sum())
[4.3 4.4 4.5 4.6 4.7 4.8 4.9 5. 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9 6.
6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9 7. 7.1 7.2 7.3 7.4 7.6 7.7 7.9]
210.39999999999998
unique_arr = np.unique(PetalLong)
print(unique_arr)
print(unique_arr.sum())
[1. 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.9 3. 3.3 3.5 3.6 3.7 3.8 3.9 4. 4.1
4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5. 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9
6. 6.1 6.3 6.4 6.6 6.7 6.9]
181.5
6.对指定列求和、均值、标准差、方差、最小值和最大值
#累积和
print(np.cumsum(SepaLong))
#均值
print(np.mean(SepaLong))
#标准差
print(np.std(SepaLong))
#方差
print(np.var(SepaLong))
#最小值
print(np.min(SepaLong))
#最大值
print(np.max(SepaLong))
[ 4.3 8.7 13.1 17.5 22. 26.6 31.2 35.8 40.4 45.1 49.8 54.6
59.4 64.2 69. 73.8 78.7 83.6 88.5 93.4 98.3 103.2 108.2 113.2
118.2 123.2 128.2 133.2 138.2 143.2 148.2 153.2 158.3 163.4 168.5 173.6
178.7 183.8 188.9 194. 199.1 204.3 209.5 214.7 219.9 225.2 230.6 236.
241.4 246.8 252.2 257.6 263.1 268.6 274.1 279.6 285.1 290.6 296.1 301.7
307.3 312.9 318.5 324.1 329.7 335.4 341.1 346.8 352.5 358.2 363.9 369.6
375.3 381.1 386.9 392.7 398.5 404.3 410.1 415.9 421.8 427.7 433.6 439.6
445.6 451.6 457.6 463.6 469.6 475.7 481.8 487.9 494. 500.1 506.2 512.4
518.6 524.8 531. 537.3 543.6 549.9 556.2 562.5 568.8 575.1 581.4 587.7
594.1 600.5 606.9 613.3 619.7 626.1 632.5 639. 645.5 652. 658.5 665.
671.6 678.2 684.9 691.6 698.3 705. 711.7 718.4 725.1 731.8 738.6 745.4
752.2 759.1 766. 772.9 779.8 786.8 793.9 801.1 808.3 815.5 822.8 830.2
837.8 845.5 853.2 860.9 868.6 876.5]
5.843333333333334
0.8253012917851409
0.6811222222222223
4.3
7.9
#累积和
print(np.cumsum(PetalLong))
#均值
print(np.mean(PetalLong))
#标准差
print(np.std(PetalLong))
#方差
print(np.var(PetalLong))
#最小值
print(np.min(PetalLong))
#最大值
print(np.max(PetalLong))
[ 1. 2.1 3.3 4.5 5.8 7.1 8.4 9.7 11. 12.3 13.6 15.
16.4 17.8 19.2 20.6 22. 23.4 24.8 26.2 27.6 29. 30.4 31.8
33.3 34.8 36.3 37.8 39.3 40.8 42.3 43.8 45.3 46.8 48.3 49.8
51.3 52.9 54.5 56.1 57.7 59.3 60.9 62.5 64.2 65.9 67.6 69.3
71.2 73.1 76.1 79.4 82.7 86.2 89.7 93.3 97. 100.8 104.7 108.6
112.5 116.5 120.5 124.5 128.5 132.5 136.6 140.7 144.8 149. 153.2 157.4
161.6 165.9 170.2 174.6 179. 183.4 187.8 192.3 196.8 201.3 205.8 210.3
214.8 219.3 223.8 228.4 233. 237.6 242.3 247. 251.7 256.4 261.1 265.9
270.7 275.5 280.3 285.2 290.1 295. 299.9 304.8 309.8 314.8 319.8 324.8
329.9 335. 340.1 345.2 350.3 355.4 360.5 365.6 370.8 376. 381.3 386.6
392. 397.4 402.9 408.4 413.9 419.5 425.1 430.7 436.3 441.9 447.5 453.2
458.9 464.6 470.4 476.2 482. 487.9 493.8 499.8 505.8 511.9 518. 524.1
530.4 536.8 543.4 550.1 556.8 563.7]
3.7579999999999996
1.759404065775303
3.0955026666666665
1.0
6.9
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)