@[目录]
numpy用于大型多维数组的数值计算
numpy的定义#!usr/bin/env python # -*- coding:utf-8 -*- """ @author: lyf @file: Task001.py @time: 2021/10/28 @desc: numpy的创建与类型 """ import numpy as np import random # 数组的几种定义方式 t1 = np.array([1,2,3]) print(t1) print(t1.dtype) t2 = np.array(range(10)) print(t2) print(t2.dtype) t3 = np.arange(10) print(t3) print(t3.dtype) # 定义数组时,指定数组的类型 t4 = np.array([1,2,3],dtype='i1') print(t4) print(t4.dtype) t5 = np.array([1,1,0,0,1,1,0],dtype='bool') print(t5) print(t5.dtype) # 调整数据类型 t6 = t5.astype(dtype='i1') print(t6) print(t6.dtype) # numpy中的小数 t7 = np.array([random.random() for i in range(10)]) print(t7) print(t7.dtype) t8 = np.round(t7,3) print(t8)numpy的形状与计算
import numpy as np #查看数组的形状:shape t1 = np.array([[1,2,3],[4,5,6]]) print('t1') print(t1) print(t1.shape) print('*'*50) t2 = np.array([[[1,2],[3,4]],[5,6],[[7,8,9],[10,11]]]) print('t2') print(t2) print(t2.shape) #修改数组形状,二维数组更改为一维数组: reshape(1,n), t.shape[0]*t.shape[1],flatten print('*'*50) t3 = t1.reshape(1,6) print('t3') print(t3) print(t3.shape) print('*'*50) t4 = t1.reshape(t1.shape[0]*t1.shape[1],) print('t4') print(t4) print(t4.shape) print("*"*50) t5 = t1.flatten() print('t5') print(t5) # 数组和数的四则运算:广播机制 print("*"*50) t6 = t1 +2 print('t6') print(t6) #数组和数组的四则运算:数组行列一样时,对应位置数字相加 print("*"*50) t7 = np.arange(6).reshape(2,3) print('t7') print(t7) t8 = t7+t1 print('t8') print(t8) #数组和数组行列不同:广播机制, print('*'*50) t9 = np.arange(3) print('t1') print(t1) print('t9') print(t9) t10 = t1+t9 print('t10') print(t10)numpy读取本地CSV数据文件
np.loadtxt(frame,dtype=np.float,delimiter=None,skiprows=0,usecols=None,unpack=False)
import numpy as np #先导入文件地址 uk_file_path = './gb_videos_data_numbers.csv' print('*'*50) t1 = np.loadtxt(uk_file_path,delimiter=',') print('t1') print(t1) print('*'*50) t2 = np.loadtxt(uk_file_path,delimiter=',',unpack=1) #unpack实现转置功能 print('t2') print(t2) # numpy中的转置:t.transpose(),t.T,t2.swapaxes(1,0)交换轴 print('*'*50) t3 = t1.transpose() print('t3') print(t3) t4 = t1.T print('t4') print(t4) t5 = t1.swapaxes(1,0) print('t5') print(t5)numpy中的索引和切片
import numpy as np #先导入文件地址 uk_file_path = './gb_videos_data_numbers.csv' t1 = np.loadtxt(uk_file_path,delimiter=',') print('t1') print(t1) # 取列表中行 print('t1中的第三行') print(t1[2]) #这两个语句同意print(t1[2,:]) print('t1中第三行及其以后的行') print(t1[2:]) #print(t1[2:,:]) print('t1中不连续的多行,第3,5,8行') print(t1[[2,4,7]]) #print(t1[[2,4,7],:]) #取列表中的列 print('取t1中第一列') print(t1[:,0]) print('取连续多列') print(t1[:,3:]) print('取不连续的多列') print(t1[:,[1,3]]) # 取多行和多列 print('取第三行第四列') print(t1[2,3]) print('取连续的多行和多列') print(t1[2:6,1:3]) #取行列交叉点的值 print('取不连续的多行和多列') print(t1[[1,7,3],[0,2,0]]) #取第一行第一列,第八行第三列,第四行第一列这三个值numpy中数值的修改
直接赋值即可
import numpy as np t = np.array([[2,3,4],[3,4,6],[77,8,6],[2,3,4]]) print('更改整行的值') t[1]=1 print(t) print('更改指定大小的值') t[t>5] = 10 print(t) # where的使用(相当于if else) print('测试where的使用') t1 = np.where(t<3,0,99) print(t1) # clip裁剪 t.clip(a,b)小于a替换为a,大于b替换为b t2 = t.clip(3,9) print(t2) #将某处的值赋值为nan,nan为浮点类型,将原数据类型更改为浮点型之后才可赋值nan t = t.astype(float) t[2,1] = np.nan print(t)numpy中的nan
import numpy as np t = np.array([[1,2,3,4],[3,4,5,6],[2,4,np.nan,6],[np.nan,8,5,3]]) t[1] = 0 print('t') print(t) print('统计数组中0的个数') print(np.count_nonzero(t)) print('统计数组中nan的个数') print(np.count_nonzero(np.isnan(t))) print('行求和') print(np.sum(t,axis=0)) print('列求和') print(np.sum(t,axis=1))sum, mean, median,max,min,ptp(极值,np提供),std(标准差)
import numpy as np def fill_ndarray(t1): for i in range(t1.shape[1]): temp_col = t1[:,i] if np.count_nonzero(np.isnan(temp_col)) != 0: temp_col_not_nan = temp_col[temp_col == temp_col] temp_col[temp_col != temp_col] = temp_col_not_nan.mean() return t1 print('将数列中的nan替换为均值,或者中值') if __name__ == '__main__': t1 = np.arange(12).reshape((3, 4)).astype(float) print('t1原始矩阵') print(t1) t1[1, 2:] = np.nan print('t1赋值nan后的矩阵') t2 = fill_ndarray(t1) print('t2') print(t2)绘制直方图
import numpy as np from matplotlib import pyplot as plt uk_file_path = './gb_videos_data_numbers.csv' t_uk = np.loadtxt(uk_file_path,delimiter=',',dtype='int') #取评论的数据 t_uk_comments = t_uk[:,-1] t_uk_comments = t_uk_comments[t_uk_comments<=10000] #选择比10000小的数据 d = 500 bin_nums = (t_uk_comments.max()-t_uk_comments.min())//d plt.figure(figsize=(20,8),dpi=80) plt.hist(t_uk_comments,bin_nums) plt.show()散点图
import numpy as np from matplotlib import pyplot as plt uk_file_path = './gb_videos_data_numbers.csv' t_uk = np.loadtxt(uk_file_path,delimiter=',',dtype='int') t_uk = t_uk[t_uk[:,1] < 20000] t_uk_comments = t_uk[:,-1] t_uk_like = t_uk[:,1] plt.scatter(t_uk_comments,t_uk_like) plt.show()numpy数组的拼接
np.vstack竖直拼接,np.hstack水平拼接,
import numpy as np t1 = np.array([[1,2,3],[2,3,4]]) t2 = np.array([[2,6,4],[2,9,1]]) print('t1') print(t1) print('t2') print(t2) print('拼接') t3 = np.vstack((t1,t2)) t4 = np.hstack((t1,t2)) print('t3') print(t3) print('t4') print(t4) print('行交换') t5 = t2[[1,0],:] print(t5) print('列交换') t6 = t2[:,[1,0]] print(t6)numpy生成数组
import numpy as np t1 = np.ones((3,2)) t2 = np.zeros(4,3) np.eye(10) np.argmax(t1,axis=1) np.argmin(t2,axis=0) print('numpy生成随机数') np.round() np.roundn()
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)