import pandas as pd if __name__ == '__main__': courses = ['语文', '数学', '英语', '计算机'] result = pd.Series(courses) print(result) """ 0 语文 1 数学 2 英语 3 计算机 dtype: object """2.Dict转Series
import pandas as pd grades = {'语文': 80, '数学':90, '英语': 100} result = pd.Series(grades) print(result) """ 语文 80 数学 90 英语 100 dtype: int64 """3.Series转LIst
import pandas as pd if __name__ == '__main__': grades = {'语文': 80, '数学': 90, '英语': 100} result = pd.Series(grades) # print(result) print(result.tolist()) """ [80, 90, 100] """4.Series转Dataframe
import pandas as pd if __name__ == '__main__': grades = {'语文': 80, '数学': 90, '英语': 100} tmp = pd.Series(grades) result = pd.Dataframe(tmp, columns=['grade']) print(result) """ grade 语文 80 数学 90 英语 100 """5.Numpy创建Series
import pandas as pd, numpy as np if __name__ == '__main__': s = pd.Series(np.arange(10, 100, 10), index=np.arange(101, 110), dtype='float') print(s)
101 10.0 102 20.0 103 30.0 104 40.0 105 50.0 106 60.0 107 70.0 108 80.0 109 90.0 dtype: float646.转换Series的数据类型
import pandas as pd if __name__ == '__main__': s = pd.Series( data=["001", "002", "003", "004"], index=list("abcd") ) print(s) print(s.astype(int)) # 类型 print(s.map(int)) # 函数
a 001 b 002 c 003 d 004 dtype: object a 1 b 2 c 3 d 4 dtype: int32 a 1 b 2 c 3 d 4 dtype: int647.添加新数据
import pandas as pd if __name__ == '__main__': s = pd.Series( data={'语文': 99, '数学': 100} ) s = s.append(pd.Series( data={'英语': 150} )) print(s)
语文 99 数学 100 英语 150 dtype: int648.reset index 转换为df
import pandas as pd if __name__ == '__main__': s = pd.Series( data={'语文': 99, '数学': 100} ) s = s.reset_index() s.columns = ['project', 'grade'] print(s)
project grade 0 语文 99 1 数学 1009.Dict创建df
import pandas as pd if __name__ == '__main__': df = pd.Dataframe( data={ '姓名': ['herio', 'xiaoo', 'gsda'], '性别': ['男', '女', '男'], '年龄': [18, 20, 19] } ) print(df)
姓名 性别 年龄 0 herio 男 18 1 xiaoo 女 20 2 gsda 男 1910.df设置索引列
import pandas as pd if __name__ == '__main__': df = pd.Dataframe( data={ '姓名': ['herio', 'xiaoo', 'gsda'], '性别': ['男', '女', '男'], '年龄': [18, 20, 19] } ) df.set_index('姓名', inplace=True) print(df)
性别 年龄 姓名 herio 男 18 xiaoo 女 20 gsda 男 1911.生成日期
import pandas as pd if __name__ == '__main__': res = pd.date_range(start='2022-01-01',end='2022-01-31') res_1 = pd.date_range(start='2022-01-01',periods=31) print(res,res_1,sep='n')取每年的所有周一(freq)
import pandas as pd if __name__ == '__main__': res = pd.date_range(start='2022-01-01',end='2022-12-31',freq='W-MON') print(res)生成某一天的二十四个小时的日期
import pandas as pd if __name__ == '__main__': res = pd.date_range(start='2022-01-01', periods=24, freq='H') res_1 = pd.date_range(start='2022-01-01', end='2022-01-02', closed='left',freq='H') print(res) print(res_1)日期生成Dataframe
import pandas as pd if __name__ == '__main__': data = pd.date_range(start='2022-02-01',periods=31) res = pd.Dataframe(data=data,columns=['day']) res['day_of_year'] = res['day'].dt.day_of_year print(res)
day day_of_year 0 2022-02-01 32 1 2022-02-02 33 2 2022-02-03 34 ..... 29 2022-03-02 61 30 2022-03-03 62生成随机数据列df
import pandas as pd import numpy as np if __name__ == '__main__': year = pd.date_range(start='2022-01-01',periods=1000) data = { 'normal': np.random.normal(loc=0,scale=1,size=1000), 'uniform': np.random.uniform(low=0,high=1,size=1000), 'binomial': np.random.binomial(n=1,p=0.2) } df = pd.Dataframe(data=data,index=year) print(df)
normal uniform binomial 2022-01-01 -1.212357 0.561198 0 2022-01-02 1.455127 0.671026 0 2022-01-03 1.458189 0.922212 0 2022-01-04 -0.164604 0.948922 0 2022-01-05 -0.292973 0.602961 0 ... ... ... ... 2024-09-22 -0.350369 0.788879 0 2024-09-23 -0.716147 0.671242 0 2024-09-24 -0.345326 0.282493 0 2024-09-25 0.000214 0.735941 0 2024-09-26 0.072581 0.719543 0打印前10行和后5行
print(df.head(10)) print() print(df.tail(5))描述基本信息
print(df.info()) print(df.describe())
DatetimeIndex: 1000 entries, 2022-01-01 to 2024-09-26 Freq: D Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 normal 1000 non-null float64 1 uniform 1000 non-null float64 2 binomial 1000 non-null int64 dtypes: float64(2), int64(1) memory usage: 31.2 KB None normal uniform binomial count 1000.000000 1000.000000 1000.0 mean -0.038351 0.513840 0.0 std 1.000126 0.289779 0.0 min -3.250206 0.000009 0.0 25% -0.732684 0.263531 0.0 50% -0.091297 0.521737 0.0 75% 0.612340 0.773006 0.0 max 3.682969 0.997907 0.0统计数据列的值出现的次数
print(df['binomial'].value_counts())前50行数据存到csv文件中
df.head(50).to_csv("数据前50行.csv")csv读取为Dataframe
import pandas as pd import numpy as np if __name__ == '__main__': df = pd.read_csv("数据前50行.csv",index_col=0) print(df.info()) print(df.head(10))12.股票数据
索引列设置为普通列
df.reset_index(inplace=True)添加年份和月
import pandas as pd if __name__ == '__main__': df = pd.read_csv("00700.HK.csv") df['Date'] = pd.to_datetime(df['Date']) df['Year'] = df['Date'].dt.year df['Month'] = df['Date'].dt.month print(df.head(10))
Date Open High Low Close Volume Year Month 0 2021-09-30 456.0 464.6 453.8 461.4 17335451 2021 9 1 2021-09-29 461.6 465.0 450.2 465.0 18250450 2021 9 2 2021-09-28 467.0 476.2 464.6 469.8 20947276 2021 9 3 2021-09-27 459.0 473.0 455.2 464.6 17966998 2021 9 4 2021-09-24 461.4 473.4 456.2 460.2 16656914 2021 9 5 2021-09-23 460.2 469.6 456.4 463.2 22210868 2021 9 6 2021-09-21 446.0 453.8 443.2 450.0 16556875 2021 9 7 2021-09-20 456.6 457.4 448.0 454.2 15513224 2021 9 8 2021-09-17 445.8 467.6 445.2 461.8 23982628 2021 9 9 2021-09-16 446.8 454.8 445.0 451.0 24519868 2021 9按年份分组对Close字段求平均值
print(df.groupby('Year')['Close'].mean())求Close最小值和对应的索引行
import pandas as pd if __name__ == '__main__': df = pd.read_csv("00700.HK.csv") df['Date'] = pd.to_datetime(df['Date']) df['Year'] = df['Date'].dt.year df['Month'] = df['Date'].dt.month print(df['Close'].min()) print(df['Close'].argmin()) print(df.loc[[df['Close'].argmin()]])
3.375 4240 Date Open High Low Close Volume Year Month 4240 2004-07-26 3.45 3.5 3.375 3.375 7439000 2004 7只处理需要的列
print(df[['Year', 'Open', 'High']].head(5))删除不需要的列
df.drop(columns=['Low','High'],inplace=True) print(df.head(5))
Date Open Close Volume Year Month 0 2021-09-30 456.0 461.4 17335451 2021 9 1 2021-09-29 461.6 465.0 18250450 2021 9 2 2021-09-28 467.0 469.8 20947276 2021 9 3 2021-09-27 459.0 464.6 17966998 2021 9 4 2021-09-24 461.4 460.2 16656914 2021 9对列重命名
# df.columns = ['D','O','H','L','C','V','Y','M'] df.rename(columns={'Date':'D'},inplace=True) print(df.head(5))
D Open High Low Close Volume Year Month 0 2021-09-30 456.0 464.6 453.8 461.4 17335451 2021 9 1 2021-09-29 461.6 465.0 450.2 465.0 18250450 2021 9 2 2021-09-28 467.0 476.2 464.6 469.8 20947276 2021 9 3 2021-09-27 459.0 473.0 455.2 464.6 17966998 2021 9 4 2021-09-24 461.4 473.4 456.2 460.2 16656914 2021 9
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)