Pandas 学习笔记二

Pandas 学习笔记二,第1张

Pandas 学习笔记二 Pandas 学习笔记二 数据的读取与存储 csv格式
import pandas as pd
# 读取csv文件
data = pd.read_csv("stock_day.csv",usecols=['open','high','low','close'])
data.head()
openhighcloselow2018-02-2723.5325.8824.1623.532018-02-2622.8023.7823.5322.802018-02-2322.8823.3722.8222.712018-02-2222.2522.7622.2822.022018-02-1421.4921.9921.9221.48
data = pd.read_csv("stock_day2.csv", names=["open", "high", "close", "low", "volume", "price_change", "p_change", "ma5", "ma10", "ma20", "v_ma5", "v_ma10", "v_ma20", "turnover"])
data.head()
openhighcloselowvolumeprice_changep_changema5ma10ma20v_ma5v_ma10v_ma20turnover2018-02-2723.5325.8824.1623.5395578.030.632.6822.94222.14222.87553782.6446738.6555576.112.392018-02-2622.8023.7823.5322.8060985.110.693.0222.40621.95522.94240827.5242736.3456007.501.532018-02-2322.8823.3722.8222.7152914.010.542.4221.93821.92923.02235119.5841871.9756372.851.322018-02-2222.2522.7622.2822.0236105.010.361.6421.44621.90923.13735397.5839904.7860149.600.902018-02-1421.4921.9921.9221.4823331.040.442.0521.36621.92323.25333590.2142935.7461716.110.58
# 写入csv文件
# data.to_csv("1.csv")
# 保存'open'列的数据
data[:10].to_csv("test.csv", columns=["open"])
pd.read_csv("test.csv")
Unnamed: 0open02018-02-2723.5312018-02-2622.8022018-02-2322.8832018-02-2222.2542018-02-1421.4952018-02-1321.4062018-02-1220.7072018-02-0921.2082018-02-0821.7992018-02-0722.69
data[:10].to_csv("test.csv", columns=["open"], index=False, mode="a", header=False)
pd.read_csv("test.csv")
Unnamed: 0open02018-02-2723.5312018-02-2622.8022018-02-2322.8832018-02-2222.2542018-02-1421.4952018-02-1321.4062018-02-1220.7072018-02-0921.2082018-02-0821.7992018-02-0722.691023.53NaN1122.8NaN1222.88NaN1322.25NaN1421.49NaN1521.4NaN1620.7NaN1721.2NaN1821.79NaN1922.69NaN hdf5格式
# 读取hdf5文件
dayClose = pd.read_hdf("day_close.h5")
dayClose.head()
000001.SZ000002.SZ000004.SZ000005.SZ000006.SZ000007.SZ000008.SZ000009.SZ000010.SZ000011.SZ...001965.SZ603283.SH002920.SZ002921.SZ300684.SZ002922.SZ300735.SZ603329.SH603655.SH603080.SH016.3017.714.582.8814.602.624.964.665.376.02...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN117.0219.204.653.0215.972.654.954.705.376.27...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN217.0217.284.563.0614.372.634.824.475.375.96...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN316.1816.974.492.9513.102.734.894.335.375.77...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN416.9517.194.552.9913.182.774.974.425.375.92...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN

5 rows × 3562 columns

# 写入hdf5文件
dayClose.to_hdf("test.h5",key="close")
pd.read_hdf("test.h5",key="close").head()
000001.SZ000002.SZ000004.SZ000005.SZ000006.SZ000007.SZ000008.SZ000009.SZ000010.SZ000011.SZ...001965.SZ603283.SH002920.SZ002921.SZ300684.SZ002922.SZ300735.SZ603329.SH603655.SH603080.SH016.3017.714.582.8814.602.624.964.665.376.02...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN117.0219.204.653.0215.972.654.954.705.376.27...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN217.0217.284.563.0614.372.634.824.475.375.96...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN316.1816.974.492.9513.102.734.894.335.375.77...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN416.9517.194.552.9913.182.774.974.425.375.92...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN

5 rows × 3562 columns

dayOpen = pd.read_hdf("day_open.h5")
dayOpen.to_hdf("test.h5",key="open")
pd.read_hdf("test.h5",key="open").head()
000001.SZ000002.SZ000004.SZ000005.SZ000006.SZ000007.SZ000008.SZ000009.SZ000010.SZ000011.SZ...001965.SZ603283.SH002920.SZ002921.SZ300684.SZ002922.SZ300735.SZ603329.SH603655.SH603080.SH015.5016.154.262.7313.992.524.764.455.375.79...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN116.5017.944.532.9114.782.614.994.695.376.03...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN217.0018.804.633.0416.082.654.964.735.376.26...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN316.9516.594.523.0213.202.634.814.355.375.74...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN416.2016.964.502.9513.172.804.884.345.375.80...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN

5 rows × 3562 columns

JSON格式
# 读取JSON格式文件
sa = pd.read_json("Sarcasm_Headlines_Dataset.json", orient="records", lines=True)
sa.head()
article_linkheadlineis_sarcastic0https://www.huffingtonpost.com/entry/versace-b...former versace store clerk sues over secret 'b...01https://www.huffingtonpost.com/entry/roseanne-...the 'roseanne' revival catches up to our thorn...02https://local.theonion.com/mom-starting-to-fea...mom starting to fear son's web series closest ...13https://politics.theonion.com/boehner-just-wan...boehner just wants wife to listen, not come up...14https://www.huffingtonpost.com/entry/jk-rowlin...j.k. rowling wishes snape happy birthday in th...0
# 写入JSON格式文件
sa.to_json("test.json", orient="records", lines=True)
pd.read_json("test.json", orient="records", lines=True)
article_linkheadlineis_sarcastic0https://www.huffingtonpost.com/entry/versace-b...former versace store clerk sues over secret 'b...01https://www.huffingtonpost.com/entry/roseanne-...the 'roseanne' revival catches up to our thorn...02https://local.theonion.com/mom-starting-to-fea...mom starting to fear son's web series closest ...13https://politics.theonion.com/boehner-just-wan...boehner just wants wife to listen, not come up...14https://www.huffingtonpost.com/entry/jk-rowlin...j.k. rowling wishes snape happy birthday in th...0............26704https://www.huffingtonpost.com/entry/american-...american politics in moral free-fall026705https://www.huffingtonpost.com/entry/americas-...america's best 20 hikes026706https://www.huffingtonpost.com/entry/reparatio...reparations and obama026707https://www.huffingtonpost.com/entry/israeli-b...israeli ban targeting boycott supporters raise...026708https://www.huffingtonpost.com/entry/gourmet-g...gourmet gifts for the foodie 20140

26709 rows × 3 columns

Pandas高级处理 缺失值处理
movie = pd.read_csv("IMDB-Movie-Data.csv")
movie.head()
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)metascore01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.012PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.023SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.034SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.045Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0
import numpy as np
# 判断是否存在缺失值
# 如果缺失值不是nan而是其他符号则先替换为nan再进行判断处理
# data_new = data.replace(to_replace="?", value=np.nan)
np.any(pd.isnull(movie)) # 返回True说明有缺失值
True
np.all(pd.notnull(movie)) # 返回False说明有缺失值
False
pd.notnull(movie).all() # 找到有缺失值的字段
Rank                   True
Title                  True
Genre                  True
Description            True
Director               True
Actors                 True
Year                   True
Runtime (Minutes)      True
Rating                 True
Votes                  True
Revenue (Millions)    False
metascore             False
dtype: bool
# 删除有缺失值的样本
data1 = movie.dropna()
pd.notnull(data1).all()
Rank                  True
Title                 True
Genre                 True
Description           True
Director              True
Actors                True
Year                  True
Runtime (Minutes)     True
Rating                True
Votes                 True
Revenue (Millions)    True
metascore             True
dtype: bool
# 替换有缺失值的样本
movie["Revenue (Millions)"].fillna(movie["Revenue (Millions)"].mean(), inplace=True)
movie["metascore"].fillna(movie["metascore"].mean(), inplace=True)
pd.notnull(movie).all() # 缺失值已经处理完毕
Rank                  True
Title                 True
Genre                 True
Description           True
Director              True
Actors                True
Year                  True
Runtime (Minutes)     True
Rating                True
Votes                 True
Revenue (Millions)    True
metascore             True
dtype: bool
数据离散化
data = pd.read_csv("stock_day.csv")
p_change = data["p_change"]
# 自动分组
sr = pd.qcut(p_change,10)
sr.value_counts()
(-10.030999999999999, -4.836]    65
(-0.462, 0.26]                   65
(0.26, 0.94]                     65
(5.27, 10.03]                    65
(-4.836, -2.444]                 64
(-2.444, -1.352]                 64
(-1.352, -0.462]                 64
(1.738, 2.938]                   64
(2.938, 5.27]                    64
(0.94, 1.738]                    63
Name: p_change, dtype: int64
pd.get_dummies(sr, prefix="涨跌幅").head()
涨跌幅_(-10.030999999999999, -4.836]涨跌幅_(-4.836, -2.444]涨跌幅_(-2.444, -1.352]涨跌幅_(-1.352, -0.462]涨跌幅_(-0.462, 0.26]涨跌幅_(0.26, 0.94]涨跌幅_(0.94, 1.738]涨跌幅_(1.738, 2.938]涨跌幅_(2.938, 5.27]涨跌幅_(5.27, 10.03]2018-02-2700000001002018-02-2600000000102018-02-2300000001002018-02-2200000010002018-02-140000000100
# 自定义分组
bins = [-100, -7, -5, -3, 0, 3, 5, 7, 100]
sr = pd.cut(p_change, bins)
sr.value_counts()
(0, 3]        215
(-3, 0]       188
(3, 5]         57
(-5, -3]       51
(5, 7]         35
(7, 100]       35
(-100, -7]     34
(-7, -5]       28
Name: p_change, dtype: int64
data2 = pd.get_dummies(sr, prefix="rise")
data2.head()
rise_(-100, -7]rise_(-7, -5]rise_(-5, -3]rise_(-3, 0]rise_(0, 3]rise_(3, 5]rise_(5, 7]rise_(7, 100]2018-02-27000010002018-02-26000001002018-02-23000010002018-02-22000010002018-02-1400001000 合并
# 按方向拼接 pd.concat(data1,data2,axis = 0)
pd.concat([data, data2], axis=1).head()
openhighcloselowvolumeprice_changep_changema5ma10ma20...v_ma20turnoverrise_(-100, -7]rise_(-7, -5]rise_(-5, -3]rise_(-3, 0]rise_(0, 3]rise_(3, 5]rise_(5, 7]rise_(7, 100]2018-02-2723.5325.8824.1623.5395578.030.632.6822.94222.14222.875...55576.112.39000010002018-02-2622.8023.7823.5322.8060985.110.693.0222.40621.95522.942...56007.501.53000001002018-02-2322.8823.3722.8222.7152914.010.542.4221.93821.92923.022...56372.851.32000010002018-02-2222.2522.7622.2822.0236105.010.361.6421.44621.90923.137...60149.600.90000010002018-02-1421.4921.9921.9221.4823331.040.442.0521.36621.92323.253...61716.110.5800001000

5 rows × 22 columns

pd.concat([data, data2], axis=0).head()
openhighcloselowvolumeprice_changep_changema5ma10ma20...v_ma20turnoverrise_(-100, -7]rise_(-7, -5]rise_(-5, -3]rise_(-3, 0]rise_(0, 3]rise_(3, 5]rise_(5, 7]rise_(7, 100]2018-02-2723.5325.8824.1623.5395578.030.632.6822.94222.14222.875...55576.112.39NaNNaNNaNNaNNaNNaNNaNNaN2018-02-2622.8023.7823.5322.8060985.110.693.0222.40621.95522.942...56007.501.53NaNNaNNaNNaNNaNNaNNaNNaN2018-02-2322.8823.3722.8222.7152914.010.542.4221.93821.92923.022...56372.851.32NaNNaNNaNNaNNaNNaNNaNNaN2018-02-2222.2522.7622.2822.0236105.010.361.6421.44621.90923.137...60149.600.90NaNNaNNaNNaNNaNNaNNaNNaN2018-02-1421.4921.9921.9221.4823331.040.442.0521.36621.92323.253...61716.110.58NaNNaNNaNNaNNaNNaNNaNNaN

5 rows × 22 columns

pd.concat([data, data2], axis=0).tail()
openhighcloselowvolumeprice_changep_changema5ma10ma20...v_ma20turnoverrise_(-100, -7]rise_(-7, -5]rise_(-5, -3]rise_(-3, 0]rise_(0, 3]rise_(3, 5]rise_(5, 7]rise_(7, 100]2015-03-06NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN0.00.00.00.00.00.00.01.02015-03-05NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN0.00.00.00.01.00.00.00.02015-03-04NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN0.00.00.00.01.00.00.00.02015-03-03NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN0.00.00.00.01.00.00.00.02015-03-02NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN0.00.00.00.01.00.00.00.0

5 rows × 22 columns

left = pd.Dataframe({'key1': ['K0', 'K0', 'K1', 'K2'],
                        'key2': ['K0', 'K1', 'K0', 'K1'],
                        'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3']})

right = pd.Dataframe({'key1': ['K0', 'K1', 'K1', 'K2'],
                        'key2': ['K0', 'K0', 'K0', 'K0'],
                        'C': ['C0', 'C1', 'C2', 'C3'],
                        'D': ['D0', 'D1', 'D2', 'D3']})
left
key1key2AB0K0K0A0B01K0K1A1B12K1K0A2B23K2K1A3B3
right
key1key2CD0K0K0C0D01K1K0C1D12K1K0C2D23K2K0C3D3
pd.merge(left, right, how="inner", on=["key1", "key2"])
key1key2ABCD0K0K0A0B0C0D01K1K0A2B2C1D12K1K0A2B2C2D2
pd.merge(left, right, how="left", on=["key1", "key2"])
key1key2ABCD0K0K0A0B0C0D01K0K1A1B1NaNNaN2K1K0A2B2C1D13K1K0A2B2C2D24K2K1A3B3NaNNaN
pd.merge(left, right, how="right", on=["key1", "key2"])
key1key2ABCD0K0K0A0B0C0D01K1K0A2B2C1D12K1K0A2B2C2D23K2K0NaNNaNC3D3
pd.merge(left, right, how="outer", on=["key1", "key2"])
key1key2ABCD0K0K0A0B0C0D01K0K1A1B1NaNNaN2K1K0A2B2C1D13K1K0A2B2C2D24K2K1A3B3NaNNaN5K2K0NaNNaNC3D3
# pd.crosstab(星期数据列, 涨跌幅数据列)
# 准备星期数据列
data.index
Index(['2018-02-27', '2018-02-26', '2018-02-23', '2018-02-22', '2018-02-14',
       '2018-02-13', '2018-02-12', '2018-02-09', '2018-02-08', '2018-02-07',
       ...
       '2015-03-13', '2015-03-12', '2015-03-11', '2015-03-10', '2015-03-09',
       '2015-03-06', '2015-03-05', '2015-03-04', '2015-03-03', '2015-03-02'],
      dtype='object', length=643)
# pandas日期类型
date = pd.to_datetime(data.index)
date
DatetimeIndex(['2018-02-27', '2018-02-26', '2018-02-23', '2018-02-22',
               '2018-02-14', '2018-02-13', '2018-02-12', '2018-02-09',
               '2018-02-08', '2018-02-07',
               ...
               '2015-03-13', '2015-03-12', '2015-03-11', '2015-03-10',
               '2015-03-09', '2015-03-06', '2015-03-05', '2015-03-04',
               '2015-03-03', '2015-03-02'],
              dtype='datetime64[ns]', length=643, freq=None)
data["week"] = date.weekday
date.weekday
Int64Index([1, 0, 4, 3, 2, 1, 0, 4, 3, 2,
            ...
            4, 3, 2, 1, 0, 4, 3, 2, 1, 0],
           dtype='int64', length=643)
data.head()
openhighcloselowvolumeprice_changep_changema5ma10ma20v_ma5v_ma10v_ma20turnoverweek2018-02-2723.5325.8824.1623.5395578.030.632.6822.94222.14222.87553782.6446738.6555576.112.3912018-02-2622.8023.7823.5322.8060985.110.693.0222.40621.95522.94240827.5242736.3456007.501.5302018-02-2322.8823.3722.8222.7152914.010.542.4221.93821.92923.02235119.5841871.9756372.851.3242018-02-2222.2522.7622.2822.0236105.010.361.6421.44621.90923.13735397.5839904.7860149.600.9032018-02-1421.4921.9921.9221.4823331.040.442.0521.36621.92323.25333590.2142935.7461716.110.582
# 准备涨跌幅数据列
data["pona"] = np.where(data["p_change"] > 0, 1, 0)
data.head()
openhighcloselowvolumeprice_changep_changema5ma10ma20v_ma5v_ma10v_ma20turnoverweekpona2018-02-2723.5325.8824.1623.5395578.030.632.6822.94222.14222.87553782.6446738.6555576.112.39112018-02-2622.8023.7823.5322.8060985.110.693.0222.40621.95522.94240827.5242736.3456007.501.53012018-02-2322.8823.3722.8222.7152914.010.542.4221.93821.92923.02235119.5841871.9756372.851.32412018-02-2222.2522.7622.2822.0236105.010.361.6421.44621.90923.13735397.5839904.7860149.600.90312018-02-1421.4921.9921.9221.4823331.040.442.0521.36621.92323.25333590.2142935.7461716.110.5821 交叉表和透视表
# 交叉表
dataTable = pd.crosstab(data["week"], data["pona"])
dataTable
pona01week0636215576261713636545968
dataTable.sum(axis=1)
week
0    125
1    131
2    132
3    128
4    127
dtype: int64
dataTable.div(dataTable.sum(axis=1), axis=0).plot(kind="bar", stacked=True)
 



dataTable.div(dataTable.sum(axis=1), axis=0)
pona01week00.5040000.49600010.4198470.58015320.4621210.53787930.4921880.50781240.4645670.535433
# 透视表
data.pivot_table(["pona"], index=["week"])
ponaweek00.49600010.58015320.53787930.50781240.535433 分组与聚合
col = pd.Dataframe({'color': ['white','red','green','red','green'], 'object': ['pen','pencil','pencil','ashtray','pen'],'price1':[5.56,4.20,1.30,0.56,2.75],'price2':[4.75,4.12,1.60,0.75,3.15]})
col
colorobjectprice1price20whitepen5.564.751redpencil4.204.122greenpencil1.301.603redashtray0.560.754greenpen2.753.15
# 进行分组,对颜色分组,price1进行聚合
# 用dataframe的方法进行分组
col.groupby(by="color")["price1"].max()
color
green    2.75
red      4.20
white    5.56
Name: price1, dtype: float64
col["price1"].groupby(col["color"]).max()
color
green    2.75
red      4.20
white    5.56
Name: price1, dtype: float64
实战案例 电影数据分析练习
数据文件:IMDB-Movie-Data.csv
问题1:我们想知道这些电影数据中评分的平均分,导演的人数等信息,我们应该怎么获取?
问题2:对于这一组电影数据,如果我们想rating,runtime的分布情况,应该如何呈现数据?
问题3:对于这一组电影数据,如果我们希望统计电影分类(genre)的情况,应该如何处理数据?
# 1、准备数据
movie = pd.read_csv("IMDB-Movie-Data.csv")
movie
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)metascore01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.012PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.023SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.034SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.045Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0.......................................995996Secret in Their EyesCrime,Drama,MysteryA tight-knit team of rising investigators, alo...Billy RayChiwetel Ejiofor, Nicole Kidman, Julia Roberts...20151116.227585NaN45.0996997Hostel: Part IIHorrorThree American college students studying abroa...Eli RothLauren German, Heather Matarazzo, Bijou Philli...2007945.57315217.5446.0997998Step Up 2: The StreetsDrama,Music,RomanceRomantic sparks occur between two dance studen...Jon M. ChuRobert Hoffman, Briana Evigan, Cassie Ventura,...2008986.27069958.0150.0998999Search PartyAdventure,ComedyA pair of friends embark on a mission to reuni...Scot ArmstrongAdam Pally, T.J. Miller, Thomas Middleditch,Sh...2014935.64881NaN22.09991000Nine LivesComedy,Family,FantasyA stuffy businessman finds himself trapped ins...Barry SonnenfeldKevin Spacey, Jennifer Garner, Robbie Amell,Ch...2016875.31243519.6411.0

1000 rows × 12 columns

# 问题1:我们想知道这些电影数据中评分的平均分,导演的人数等信息,我们应该怎么获取?
# 评分的平均分
movie["Rating"].mean()
6.723199999999999
# 导演的人数
np.unique(movie["Director"]).size
644
# 问题2:对于这一组电影数据,如果我们想rating,runtime的分布情况,应该如何呈现数据?
movie["Rating"].plot(kind="hist", figsize=(20, 8))
 



import matplotlib.pyplot as plt
# 1、创建画布
plt.figure(figsize=(20, 8), dpi=80)

# 2、绘制直方图
plt.hist(movie["Rating"], 20)

# 修改刻度
plt.xticks(np.linspace(movie["Rating"].min(), movie["Rating"].max(), 21))

# 添加网格
plt.grid(linestyle="--", alpha=0.5)

# 3、显示图像
plt.show()



# 问题3:对于这一组电影数据,如果我们希望统计电影分类(genre)的情况,应该如何处理数据?# 1、创建画布
# 先统计电影类别都有哪些
movie_genre = [i.split(",") for i in movie["Genre"]]
movie_genre
[['Action', 'Adventure', 'Sci-Fi'],
 ['Adventure', 'Mystery', 'Sci-Fi'],
 ['Horror', 'Thriller'],
 ['Animation', 'Comedy', 'Family'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Comedy', 'Drama', 'Music'],
 ['Comedy'],
 ['Action', 'Adventure', 'Biography'],
 ['Adventure', 'Drama', 'Romance'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Biography', 'Drama', 'History'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Comedy', 'Drama'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Biography', 'Drama', 'History'],
 ['Action', 'Thriller'],
 ['Biography', 'Drama'],
 ['Drama', 'Mystery', 'Sci-Fi'],
 ['Adventure', 'Drama', 'Thriller'],
 ['Drama'],
 ['Crime', 'Drama', 'Horror'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Comedy'],
 ['Action', 'Adventure', 'Drama'],
 ['Horror', 'Thriller'],
 ['Comedy'],
 ['Action', 'Adventure', 'Drama'],
 ['Comedy'],
 ['Drama', 'Thriller'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Adventure', 'Comedy'],
 ['Action', 'Horror', 'Sci-Fi'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Adventure', 'Drama', 'Sci-Fi'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Adventure', 'Western'],
 ['Comedy', 'Drama'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Drama'],
 ['Horror'],
 ['Biography', 'Drama', 'History'],
 ['Drama'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Drama', 'Thriller'],
 ['Adventure', 'Drama', 'Fantasy'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Drama'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Comedy', 'Drama'],
 ['Action', 'Crime', 'Thriller'],
 ['Action', 'Crime', 'Drama'],
 ['Adventure', 'Drama', 'History'],
 ['Crime', 'Horror', 'Thriller'],
 ['Drama', 'Romance'],
 ['Comedy', 'Drama', 'Romance'],
 ['Biography', 'Drama'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Horror', 'Mystery', 'Thriller'],
 ['Crime', 'Drama', 'Mystery'],
 ['Drama', 'Romance', 'Thriller'],
 ['Drama', 'Mystery', 'Sci-Fi'],
 ['Action', 'Adventure', 'Comedy'],
 ['Drama', 'History', 'Thriller'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Drama'],
 ['Action', 'Drama', 'Thriller'],
 ['Drama', 'History'],
 ['Action', 'Drama', 'Romance'],
 ['Drama', 'Fantasy'],
 ['Drama', 'Romance'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Sci-Fi'],
 ['Adventure', 'Drama', 'War'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Comedy', 'Fantasy'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Comedy', 'Drama'],
 ['Biography', 'Comedy', 'Crime'],
 ['Crime', 'Drama', 'Mystery'],
 ['Action', 'Crime', 'Thriller'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Crime', 'Drama'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Crime', 'Drama', 'Mystery'],
 ['Action', 'Crime', 'Drama'],
 ['Crime', 'Drama', 'Mystery'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Drama'],
 ['Comedy', 'Crime', 'Drama'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Comedy', 'Crime'],
 ['Animation', 'Drama', 'Fantasy'],
 ['Horror', 'Mystery', 'Sci-Fi'],
 ['Drama', 'Mystery', 'Thriller'],
 ['Crime', 'Drama', 'Thriller'],
 ['Biography', 'Crime', 'Drama'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Adventure', 'Drama', 'Sci-Fi'],
 ['Crime', 'Mystery', 'Thriller'],
 ['Action', 'Adventure', 'Comedy'],
 ['Crime', 'Drama', 'Thriller'],
 ['Comedy'],
 ['Action', 'Adventure', 'Drama'],
 ['Drama'],
 ['Drama', 'Mystery', 'Sci-Fi'],
 ['Action', 'Horror', 'Thriller'],
 ['Biography', 'Drama', 'History'],
 ['Romance', 'Sci-Fi'],
 ['Action', 'Fantasy', 'War'],
 ['Adventure', 'Drama', 'Fantasy'],
 ['Comedy'],
 ['Horror', 'Thriller'],
 ['Action', 'Biography', 'Drama'],
 ['Drama', 'Horror', 'Mystery'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Adventure', 'Drama', 'Family'],
 ['Adventure', 'Mystery', 'Sci-Fi'],
 ['Adventure', 'Comedy', 'Romance'],
 ['Action'],
 ['Action', 'Thriller'],
 ['Adventure', 'Drama', 'Family'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Adventure', 'Crime', 'Mystery'],
 ['Comedy', 'Family', 'Musical'],
 ['Adventure', 'Drama', 'Thriller'],
 ['Drama'],
 ['Adventure', 'Comedy', 'Drama'],
 ['Drama', 'Horror', 'Thriller'],
 ['Drama', 'Music'],
 ['Action', 'Crime', 'Thriller'],
 ['Crime', 'Drama', 'Thriller'],
 ['Crime', 'Drama', 'Thriller'],
 ['Drama', 'Romance'],
 ['Mystery', 'Thriller'],
 ['Mystery', 'Thriller', 'Western'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Comedy', 'Family'],
 ['Biography', 'Comedy', 'Drama'],
 ['Drama'],
 ['Drama', 'Western'],
 ['Drama', 'Mystery', 'Romance'],
 ['Comedy', 'Drama'],
 ['Action', 'Drama', 'Mystery'],
 ['Comedy'],
 ['Action', 'Adventure', 'Crime'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Adventure', 'Sci-Fi', 'Thriller'],
 ['Drama'],
 ['Action', 'Crime', 'Drama'],
 ['Drama', 'Horror', 'Mystery'],
 ['Action', 'Horror', 'Sci-Fi'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Comedy', 'Drama', 'Romance'],
 ['Action', 'Comedy', 'Fantasy'],
 ['Action', 'Comedy', 'Mystery'],
 ['Thriller', 'War'],
 ['Action', 'Comedy', 'Crime'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Adventure', 'Crime'],
 ['Action', 'Adventure', 'Thriller'],
 ['Drama', 'Fantasy', 'Romance'],
 ['Action', 'Adventure', 'Comedy'],
 ['Biography', 'Drama', 'History'],
 ['Action', 'Drama', 'History'],
 ['Action', 'Adventure', 'Thriller'],
 ['Crime', 'Drama', 'Thriller'],
 ['Animation', 'Adventure', 'Family'],
 ['Adventure', 'Horror'],
 ['Drama', 'Romance', 'Sci-Fi'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Adventure', 'Family'],
 ['Action', 'Adventure', 'Drama'],
 ['Action', 'Comedy'],
 ['Horror', 'Mystery', 'Thriller'],
 ['Action', 'Adventure', 'Comedy'],
 ['Comedy', 'Romance'],
 ['Horror', 'Mystery'],
 ['Drama', 'Family', 'Fantasy'],
 ['Sci-Fi'],
 ['Drama', 'Thriller'],
 ['Drama', 'Romance'],
 ['Drama', 'War'],
 ['Drama', 'Fantasy', 'Horror'],
 ['Crime', 'Drama'],
 ['Comedy', 'Drama', 'Romance'],
 ['Drama', 'Romance'],
 ['Drama'],
 ['Crime', 'Drama', 'History'],
 ['Horror', 'Sci-Fi', 'Thriller'],
 ['Action', 'Drama', 'Sport'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Crime', 'Drama', 'Thriller'],
 ['Adventure', 'Biography', 'Drama'],
 ['Biography', 'Drama', 'Thriller'],
 ['Action', 'Comedy', 'Crime'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Drama', 'Fantasy', 'Horror'],
 ['Biography', 'Drama', 'Thriller'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Adventure', 'Mystery'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Drama', 'Horror'],
 ['Comedy', 'Drama', 'Romance'],
 ['Comedy', 'Romance'],
 ['Drama', 'Horror', 'Thriller'],
 ['Action', 'Adventure', 'Drama'],
 ['Drama'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Drama', 'Mystery'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Adventure', 'Comedy'],
 ['Drama', 'Horror'],
 ['Action', 'Comedy'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Horror', 'Mystery'],
 ['Crime', 'Drama', 'Mystery'],
 ['Comedy', 'Crime'],
 ['Drama'],
 ['Comedy', 'Drama', 'Romance'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Adventure', 'Family'],
 ['Horror', 'Sci-Fi', 'Thriller'],
 ['Drama', 'Fantasy', 'War'],
 ['Crime', 'Drama', 'Thriller'],
 ['Action', 'Adventure', 'Drama'],
 ['Action', 'Adventure', 'Thriller'],
 ['Action', 'Adventure', 'Drama'],
 ['Drama', 'Romance'],
 ['Biography', 'Drama', 'History'],
 ['Drama', 'Horror', 'Thriller'],
 ['Adventure', 'Comedy', 'Drama'],
 ['Action', 'Adventure', 'Romance'],
 ['Action', 'Drama', 'War'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Drama', 'Musical', 'Romance'],
 ['Drama', 'Sci-Fi', 'Thriller'],
 ['Comedy', 'Drama'],
 ['Action', 'Comedy', 'Crime'],
 ['Biography', 'Comedy', 'Drama'],
 ['Comedy', 'Drama', 'Romance'],
 ['Drama', 'Thriller'],
 ['Biography', 'Drama', 'History'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Horror', 'Mystery', 'Thriller'],
 ['Comedy'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Drama', 'Sci-Fi'],
 ['Horror'],
 ['Drama', 'Thriller'],
 ['Comedy', 'Drama', 'Romance'],
 ['Drama', 'Thriller'],
 ['Comedy', 'Drama'],
 ['Drama'],
 ['Action', 'Adventure', 'Comedy'],
 ['Drama', 'Horror', 'Thriller'],
 ['Comedy'],
 ['Drama', 'Sci-Fi'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Horror'],
 ['Action', 'Adventure', 'Thriller'],
 ['Adventure', 'Fantasy'],
 ['Action', 'Comedy', 'Crime'],
 ['Comedy', 'Drama', 'Music'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Adventure', 'Mystery'],
 ['Action', 'Comedy', 'Crime'],
 ['Crime', 'Drama', 'History'],
 ['Comedy'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Crime', 'Mystery', 'Thriller'],
 ['Action', 'Adventure', 'Crime'],
 ['Thriller'],
 ['Biography', 'Drama', 'Romance'],
 ['Action', 'Adventure'],
 ['Action', 'Fantasy'],
 ['Action', 'Comedy'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Comedy', 'Crime'],
 ['Thriller'],
 ['Action', 'Drama', 'Horror'],
 ['Comedy', 'Music', 'Romance'],
 ['Comedy'],
 ['Drama'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Drama', 'Romance'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Comedy', 'Drama'],
 ['Biography', 'Crime', 'Drama'],
 ['Drama', 'History'],
 ['Action', 'Crime', 'Thriller'],
 ['Action', 'Biography', 'Drama'],
 ['Horror'],
 ['Comedy', 'Romance'],
 ['Comedy', 'Romance'],
 ['Comedy', 'Crime', 'Drama'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Crime', 'Drama', 'Thriller'],
 ['Action', 'Crime', 'Thriller'],
 ['Comedy', 'Romance'],
 ['Biography', 'Drama', 'Sport'],
 ['Drama', 'Romance'],
 ['Drama', 'Horror'],
 ['Adventure', 'Fantasy'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Action', 'Drama', 'Sci-Fi'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Horror'],
 ['Comedy', 'Horror', 'Thriller'],
 ['Action', 'Crime', 'Thriller'],
 ['Crime', 'Drama', 'Music'],
 ['Drama'],
 ['Action', 'Crime', 'Thriller'],
 ['Action', 'Sci-Fi', 'Thriller'],
 ['Biography', 'Drama'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Drama', 'Horror', 'Sci-Fi'],
 ['Biography', 'Comedy', 'Drama'],
 ['Crime', 'Horror', 'Thriller'],
 ['Crime', 'Drama', 'Mystery'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Biography', 'Drama'],
 ['Biography', 'Drama'],
 ['Biography', 'Drama', 'History'],
 ['Action', 'Biography', 'Drama'],
 ['Drama', 'Fantasy', 'Horror'],
 ['Comedy', 'Drama', 'Romance'],
 ['Drama', 'Sport'],
 ['Drama', 'Romance'],
 ['Comedy', 'Romance'],
 ['Action', 'Crime', 'Thriller'],
 ['Action', 'Crime', 'Drama'],
 ['Action', 'Drama', 'Thriller'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Action', 'Adventure'],
 ['Action', 'Adventure', 'Romance'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Crime', 'Drama'],
 ['Comedy', 'Horror'],
 ['Comedy', 'Fantasy', 'Romance'],
 ['Drama'],
 ['Drama'],
 ['Comedy', 'Drama'],
 ['Comedy', 'Drama', 'Romance'],
 ['Adventure', 'Sci-Fi', 'Thriller'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Comedy', 'Drama'],
 ['Biography', 'Drama', 'Romance'],
 ['Comedy', 'Fantasy'],
 ['Comedy', 'Drama', 'Fantasy'],
 ['Comedy'],
 ['Horror', 'Thriller'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Adventure', 'Comedy', 'Horror'],
 ['Comedy', 'Mystery'],
 ['Drama'],
 ['Adventure', 'Drama', 'Fantasy'],
 ['Drama', 'Sport'],
 ['Action', 'Adventure'],
 ['Action', 'Adventure', 'Drama'],
 ['Action', 'Drama', 'Sci-Fi'],
 ['Action', 'Mystery', 'Sci-Fi'],
 ['Action', 'Crime', 'Drama'],
 ['Action', 'Crime', 'Fantasy'],
 ['Biography', 'Comedy', 'Drama'],
 ['Action', 'Crime', 'Thriller'],
 ['Biography', 'Crime', 'Drama'],
 ['Drama', 'Sport'],
 ['Adventure', 'Comedy', 'Drama'],
 ['Action', 'Adventure', 'Thriller'],
 ['Comedy', 'Fantasy', 'Horror'],
 ['Drama', 'Sport'],
 ['Horror', 'Thriller'],
 ['Drama', 'History', 'Thriller'],
 ['Animation', 'Action', 'Adventure'],
 ['Action', 'Adventure', 'Drama'],
 ['Action', 'Comedy', 'Family'],
 ['Action', 'Adventure', 'Drama'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Comedy'],
 ['Action', 'Crime', 'Drama'],
 ['Biography', 'Drama'],
 ['Comedy', 'Romance'],
 ['Comedy'],
 ['Drama', 'Fantasy', 'Romance'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Comedy'],
 ['Comedy', 'Sci-Fi'],
 ['Comedy', 'Drama'],
 ['Animation', 'Action', 'Adventure'],
 ['Horror'],
 ['Action', 'Biography', 'Crime'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Drama', 'Romance'],
 ['Drama', 'Mystery', 'Thriller'],
 ['Drama', 'History', 'Thriller'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Adventure', 'Comedy'],
 ['Action', 'Thriller'],
 ['Comedy', 'Music'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Crime', 'Drama', 'Thriller'],
 ['Action', 'Adventure', 'Crime'],
 ['Comedy', 'Drama', 'Horror'],
 ['Drama'],
 ['Drama', 'Mystery', 'Romance'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Drama'],
 ['Action', 'Drama', 'Thriller'],
 ['Drama'],
 ['Action', 'Horror', 'Romance'],
 ['Action', 'Drama', 'Fantasy'],
 ['Action', 'Crime', 'Drama'],
 ['Drama', 'Fantasy', 'Romance'],
 ['Action', 'Crime', 'Thriller'],
 ['Action', 'Mystery', 'Thriller'],
 ['Horror', 'Mystery', 'Thriller'],
 ['Action', 'Horror', 'Sci-Fi'],
 ['Comedy', 'Drama'],
 ['Comedy'],
 ['Action', 'Adventure', 'Horror'],
 ['Action', 'Adventure', 'Thriller'],
 ['Action', 'Crime', 'Drama'],
 ['Comedy', 'Crime', 'Drama'],
 ['Drama', 'Romance'],
 ['Drama', 'Thriller'],
 ['Action', 'Comedy', 'Crime'],
 ['Comedy'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Drama', 'Romance'],
 ['Animation', 'Family', 'Fantasy'],
 ['Drama', 'Romance'],
 ['Thriller'],
 ['Adventure', 'Horror', 'Mystery'],
 ['Action', 'Sci-Fi'],
 ['Adventure', 'Comedy', 'Drama'],
 ['Animation', 'Action', 'Adventure'],
 ['Drama', 'Horror'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Comedy', 'Drama'],
 ['Action', 'Horror', 'Mystery'],
 ['Action', 'Thriller'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Drama'],
 ['Comedy', 'Drama', 'Romance'],
 ['Comedy', 'Crime'],
 ['Comedy', 'Romance'],
 ['Drama', 'Romance'],
 ['Crime', 'Drama', 'Thriller'],
 ['Horror', 'Mystery', 'Thriller'],
 ['Biography', 'Drama'],
 ['Drama', 'Mystery', 'Sci-Fi'],
 ['Adventure', 'Comedy', 'Family'],
 ['Action', 'Adventure', 'Crime'],
 ['Action', 'Crime', 'Mystery'],
 ['Mystery', 'Thriller'],
 ['Action', 'Sci-Fi', 'Thriller'],
 ['Action', 'Comedy', 'Crime'],
 ['Biography', 'Crime', 'Drama'],
 ['Biography', 'Drama', 'History'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Biography', 'Drama', 'History'],
 ['Biography', 'Comedy', 'Drama'],
 ['Drama', 'Thriller'],
 ['Horror', 'Thriller'],
 ['Drama'],
 ['Drama', 'War'],
 ['Comedy', 'Drama', 'Romance'],
 ['Drama', 'Romance', 'Sci-Fi'],
 ['Action', 'Crime', 'Drama'],
 ['Comedy', 'Drama'],
 ['Animation', 'Action', 'Adventure'],
 ['Adventure', 'Comedy', 'Drama'],
 ['Comedy', 'Drama', 'Family'],
 ['Drama', 'Romance', 'Thriller'],
 ['Comedy', 'Crime', 'Drama'],
 ['Animation', 'Comedy', 'Family'],
 ['Drama', 'Horror', 'Sci-Fi'],
 ['Action', 'Adventure', 'Drama'],
 ['Action', 'Horror', 'Sci-Fi'],
 ['Action', 'Crime', 'Sport'],
 ['Drama', 'Horror', 'Sci-Fi'],
 ['Drama', 'Horror', 'Sci-Fi'],
 ['Action', 'Adventure', 'Comedy'],
 ['Mystery', 'Sci-Fi', 'Thriller'],
 ['Crime', 'Drama', 'Thriller'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Sci-Fi', 'Thriller'],
 ['Drama', 'Romance'],
 ['Crime', 'Drama', 'Thriller'],
 ['Comedy', 'Drama', 'Music'],
 ['Drama', 'Fantasy', 'Romance'],
 ['Crime', 'Drama', 'Thriller'],
 ['Crime', 'Drama', 'Thriller'],
 ['Comedy', 'Drama', 'Romance'],
 ['Comedy', 'Romance'],
 ['Drama', 'Sci-Fi', 'Thriller'],
 ['Drama', 'War'],
 ['Action', 'Crime', 'Drama'],
 ['Sci-Fi', 'Thriller'],
 ['Adventure', 'Drama', 'Horror'],
 ['Comedy', 'Drama', 'Music'],
 ['Comedy', 'Drama', 'Romance'],
 ['Action', 'Adventure', 'Drama'],
 ['Action', 'Crime', 'Drama'],
 ['Adventure', 'Fantasy'],
 ['Drama', 'Romance'],
 ['Biography', 'History', 'Thriller'],
 ['Crime', 'Drama', 'Thriller'],
 ['Action', 'Drama', 'History'],
 ['Biography', 'Comedy', 'Drama'],
 ['Crime', 'Drama', 'Thriller'],
 ['Action', 'Biography', 'Drama'],
 ['Action', 'Drama', 'Sci-Fi'],
 ['Adventure', 'Horror'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Adventure', 'Mystery'],
 ['Comedy', 'Drama', 'Romance'],
 ['Horror', 'Thriller'],
 ['Action', 'Sci-Fi', 'Thriller'],
 ['Action', 'Sci-Fi', 'Thriller'],
 ['Biography', 'Drama'],
 ['Action', 'Crime', 'Drama'],
 ['Action', 'Crime', 'Mystery'],
 ['Action', 'Adventure', 'Comedy'],
 ['Crime', 'Drama', 'Thriller'],
 ['Crime', 'Drama'],
 ['Mystery', 'Thriller'],
 ['Mystery', 'Sci-Fi', 'Thriller'],
 ['Action', 'Mystery', 'Sci-Fi'],
 ['Drama', 'Romance'],
 ['Drama', 'Thriller'],
 ['Drama', 'Mystery', 'Sci-Fi'],
 ['Comedy', 'Drama'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Biography', 'Drama', 'Sport'],
 ['Drama'],
 ['Comedy', 'Drama', 'Romance'],
 ['Biography', 'Drama', 'Romance'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Drama', 'Sci-Fi', 'Thriller'],
 ['Drama', 'Romance', 'Thriller'],
 ['Mystery', 'Thriller'],
 ['Mystery', 'Thriller'],
 ['Action', 'Drama', 'Fantasy'],
 ['Action', 'Adventure', 'Biography'],
 ['Adventure', 'Comedy', 'Sci-Fi'],
 ['Action', 'Adventure', 'Thriller'],
 ['Fantasy', 'Horror'],
 ['Horror', 'Mystery'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Adventure', 'Drama'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Comedy', 'Drama'],
 ['Comedy', 'Drama'],
 ['Crime', 'Drama', 'Thriller'],
 ['Comedy', 'Romance'],
 ['Animation', 'Comedy', 'Family'],
 ['Comedy', 'Drama'],
 ['Comedy', 'Drama'],
 ['Biography', 'Drama', 'Sport'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Drama', 'History'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Adventure', 'Mystery'],
 ['Crime', 'Drama', 'Mystery'],
 ['Action'],
 ['Action', 'Adventure', 'Family'],
 ['Comedy', 'Romance'],
 ['Comedy', 'Drama', 'Romance'],
 ['Biography', 'Drama', 'Sport'],
 ['Action', 'Fantasy', 'Thriller'],
 ['Biography', 'Drama', 'Sport'],
 ['Action', 'Drama', 'Fantasy'],
 ['Adventure', 'Sci-Fi', 'Thriller'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Drama', 'Mystery', 'Thriller'],
 ['Drama', 'Romance'],
 ['Crime', 'Drama', 'Mystery'],
 ['Comedy', 'Romance', 'Sport'],
 ['Comedy', 'Family'],
 ['Drama', 'Horror', 'Mystery'],
 ['Action', 'Drama', 'Sport'],
 ['Action', 'Adventure', 'Comedy'],
 ['Drama', 'Mystery', 'Sci-Fi'],
 ['Animation', 'Action', 'Comedy'],
 ['Action', 'Crime', 'Drama'],
 ['Action', 'Crime', 'Drama'],
 ['Comedy', 'Drama', 'Romance'],
 ['Animation', 'Action', 'Adventure'],
 ['Crime', 'Drama'],
 ['Drama'],
 ['Drama'],
 ['Comedy', 'Crime'],
 ['Drama'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Drama', 'Fantasy', 'Romance'],
 ['Comedy', 'Drama'],
 ['Drama', 'Fantasy', 'Thriller'],
 ['Biography', 'Crime', 'Drama'],
 ['Comedy', 'Drama', 'Romance'],
 ['Action', 'Crime', 'Drama'],
 ['Sci-Fi'],
 ['Action', 'Biography', 'Drama'],
 ['Action', 'Comedy', 'Romance'],
 ['Adventure', 'Comedy', 'Drama'],
 ['Comedy', 'Crime', 'Drama'],
 ['Action', 'Fantasy', 'Horror'],
 ['Drama', 'Horror'],
 ['Horror'],
 ['Action', 'Thriller'],
 ['Action', 'Adventure', 'Mystery'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Comedy', 'Drama', 'Romance'],
 ['Crime', 'Drama', 'Mystery'],
 ['Adventure', 'Comedy', 'Family'],
 ['Comedy', 'Drama', 'Romance'],
 ['Comedy'],
 ['Comedy', 'Drama', 'Horror'],
 ['Drama', 'Horror', 'Thriller'],
 ['Animation', 'Adventure', 'Family'],
 ['Comedy', 'Romance'],
 ['Mystery', 'Romance', 'Sci-Fi'],
 ['Crime', 'Drama'],
 ['Drama', 'Horror', 'Mystery'],
 ['Comedy'],
 ['Biography', 'Drama'],
 ['Comedy', 'Drama', 'Thriller'],
 ['Comedy', 'Western'],
 ['Drama', 'History', 'War'],
 ['Drama', 'Horror', 'Sci-Fi'],
 ['Drama'],
 ['Comedy', 'Drama'],
 ['Fantasy', 'Horror', 'Thriller'],
 ['Drama', 'Romance'],
 ['Action', 'Comedy', 'Fantasy'],
 ['Drama', 'Horror', 'Musical'],
 ['Crime', 'Drama', 'Mystery'],
 ['Horror', 'Mystery', 'Thriller'],
 ['Comedy', 'Music'],
 ['Drama'],
 ['Biography', 'Crime', 'Drama'],
 ['Drama'],
 ['Action', 'Adventure', 'Comedy'],
 ['Crime', 'Drama', 'Mystery'],
 ['Drama'],
 ['Action', 'Comedy', 'Crime'],
 ['Comedy', 'Drama', 'Romance'],
 ['Crime', 'Drama', 'Mystery'],
 ['Action', 'Comedy', 'Crime'],
 ['Drama'],
 ['Drama', 'Romance'],
 ['Crime', 'Drama', 'Mystery'],
 ['Adventure', 'Comedy', 'Romance'],
 ['Comedy', 'Crime', 'Drama'],
 ['Adventure', 'Drama', 'Thriller'],
 ['Biography', 'Crime', 'Drama'],
 ['Crime', 'Drama', 'Thriller'],
 ['Drama', 'History', 'Thriller'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Comedy'],
 ['Horror'],
 ['Action', 'Crime', 'Mystery'],
 ['Comedy', 'Romance'],
 ['Comedy'],
 ['Action', 'Drama', 'Thriller'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Drama', 'Mystery', 'Thriller'],
 ['Comedy', 'Drama', 'Romance'],
 ['Action', 'Fantasy', 'Horror'],
 ['Drama', 'Romance'],
 ['Biography', 'Drama'],
 ['Biography', 'Drama'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Drama', 'Mystery', 'Thriller'],
 ['Action', 'Horror', 'Sci-Fi'],
 ['Drama', 'Romance'],
 ['Biography', 'Drama'],
 ['Action', 'Adventure', 'Drama'],
 ['Adventure', 'Drama', 'Fantasy'],
 ['Drama', 'Family'],
 ['Comedy', 'Drama', 'Romance'],
 ['Drama', 'Romance', 'Sci-Fi'],
 ['Action', 'Adventure', 'Thriller'],
 ['Comedy', 'Romance'],
 ['Crime', 'Drama', 'Horror'],
 ['Comedy', 'Fantasy'],
 ['Action', 'Comedy', 'Crime'],
 ['Adventure', 'Drama', 'Romance'],
 ['Action', 'Crime', 'Drama'],
 ['Crime', 'Horror', 'Thriller'],
 ['Romance', 'Sci-Fi', 'Thriller'],
 ['Comedy', 'Drama', 'Romance'],
 ['Crime', 'Drama'],
 ['Crime', 'Drama', 'Mystery'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Animation', 'Fantasy'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Drama', 'Mystery', 'War'],
 ['Comedy', 'Romance'],
 ['Animation', 'Comedy', 'Family'],
 ['Comedy'],
 ['Horror', 'Mystery', 'Thriller'],
 ['Action', 'Adventure', 'Drama'],
 ['Comedy'],
 ['Drama'],
 ['Adventure', 'Biography', 'Drama'],
 ['Comedy'],
 ['Horror', 'Thriller'],
 ['Action', 'Drama', 'Family'],
 ['Comedy', 'Fantasy', 'Horror'],
 ['Comedy', 'Romance'],
 ['Drama', 'Mystery', 'Romance'],
 ['Action', 'Adventure', 'Comedy'],
 ['Thriller'],
 ['Comedy'],
 ['Adventure', 'Comedy', 'Sci-Fi'],
 ['Comedy', 'Drama', 'Fantasy'],
 ['Mystery', 'Thriller'],
 ['Comedy', 'Drama'],
 ['Adventure', 'Drama', 'Family'],
 ['Horror', 'Thriller'],
 ['Action', 'Drama', 'Romance'],
 ['Drama', 'Romance'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Comedy'],
 ['Action', 'Biography', 'Drama'],
 ['Drama', 'Mystery', 'Romance'],
 ['Adventure', 'Drama', 'Western'],
 ['Drama', 'Music', 'Romance'],
 ['Comedy', 'Romance', 'Western'],
 ['Thriller'],
 ['Comedy', 'Drama', 'Romance'],
 ['Horror', 'Thriller'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Crime', 'Drama', 'Mystery'],
 ['Horror', 'Mystery'],
 ['Comedy', 'Crime', 'Drama'],
 ['Action', 'Comedy', 'Romance'],
 ['Biography', 'Drama', 'History'],
 ['Adventure', 'Drama'],
 ['Drama', 'Thriller'],
 ['Drama'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Biography', 'Drama'],
 ['Drama', 'Music'],
 ['Comedy', 'Drama'],
 ['Drama', 'Thriller', 'War'],
 ['Action', 'Mystery', 'Thriller'],
 ['Horror', 'Sci-Fi', 'Thriller'],
 ['Comedy', 'Drama', 'Romance'],
 ['Action', 'Sci-Fi'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Drama', 'Mystery', 'Romance'],
 ['Drama'],
 ['Action', 'Adventure', 'Thriller'],
 ['Action', 'Crime', 'Thriller'],
 ['Animation', 'Action', 'Adventure'],
 ['Drama', 'Fantasy', 'Mystery'],
 ['Drama', 'Sci-Fi'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Horror', 'Thriller'],
 ['Action', 'Thriller'],
 ['Comedy'],
 ['Biography', 'Drama'],
 ['Action', 'Mystery', 'Thriller'],
 ['Action', 'Mystery', 'Sci-Fi'],
 ['Crime', 'Drama', 'Thriller'],
 ['Comedy', 'Romance'],
 ['Comedy', 'Drama', 'Romance'],
 ['Biography', 'Drama', 'Thriller'],
 ['Drama'],
 ['Action', 'Adventure', 'Family'],
 ['Animation', 'Comedy', 'Family'],
 ['Action', 'Crime', 'Drama'],
 ['Comedy'],
 ['Comedy', 'Crime', 'Thriller'],
 ['Comedy', 'Romance'],
 ['Animation', 'Comedy', 'Drama'],
 ['Action', 'Crime', 'Thriller'],
 ['Comedy', 'Romance'],
 ['Adventure', 'Biography', 'Drama'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Crime', 'Drama', 'Mystery'],
 ['Action', 'Comedy', 'Sci-Fi'],
 ['Comedy', 'Fantasy', 'Horror'],
 ['Comedy', 'Crime'],
 ['Animation', 'Action', 'Adventure'],
 ['Action', 'Drama', 'Thriller'],
 ['Fantasy', 'Horror'],
 ['Crime', 'Drama', 'Thriller'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Comedy', 'Drama', 'Romance'],
 ['Biography', 'Drama', 'Romance'],
 ['Action', 'Drama', 'History'],
 ['Action', 'Adventure', 'Comedy'],
 ['Horror', 'Thriller'],
 ['Horror', 'Mystery', 'Thriller'],
 ['Comedy', 'Romance'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Crime', 'Drama', 'Mystery'],
 ['Crime', 'Drama', 'Mystery'],
 ['Adventure', 'Biography', 'Drama'],
 ['Horror', 'Mystery', 'Thriller'],
 ['Horror', 'Thriller'],
 ['Drama', 'Romance', 'War'],
 ['Adventure', 'Fantasy', 'Mystery'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Biography', 'Drama'],
 ['Drama', 'Thriller'],
 ['Horror', 'Thriller'],
 ['Drama', 'Horror', 'Thriller'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Horror', 'Thriller'],
 ['Comedy'],
 ['Drama', 'Sport'],
 ['Comedy', 'Family'],
 ['Drama', 'Romance'],
 ['Action', 'Adventure', 'Comedy'],
 ['Comedy'],
 ['Mystery', 'Romance', 'Thriller'],
 ['Crime', 'Drama'],
 ['Action', 'Comedy'],
 ['Crime', 'Drama', 'Mystery'],
 ['Biography', 'Drama', 'Romance'],
 ['Comedy', 'Crime'],
 ['Drama', 'Thriller'],
 ['Drama'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Thriller'],
 ['Drama', 'Thriller'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Crime', 'Drama', 'Mystery'],
 ['Thriller'],
 ['Biography', 'Drama', 'Sport'],
 ['Crime', 'Drama', 'Thriller'],
 ['Drama', 'Music'],
 ['Crime', 'Drama', 'Thriller'],
 ['Drama', 'Romance'],
 ['Animation', 'Action', 'Adventure'],
 ['Comedy', 'Drama'],
 ['Action', 'Adventure', 'Drama'],
 ['Biography', 'Crime', 'Drama'],
 ['Horror'],
 ['Biography', 'Drama', 'Mystery'],
 ['Drama', 'Romance'],
 ['Animation', 'Drama', 'Romance'],
 ['Comedy', 'Family'],
 ['Drama'],
 ['Mystery', 'Thriller'],
 ['Drama', 'Fantasy', 'Horror'],
 ['Drama', 'Romance'],
 ['Biography', 'Drama', 'History'],
 ['Comedy', 'Family'],
 ['Action', 'Adventure', 'Thriller'],
 ['Comedy', 'Drama'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Thriller'],
 ['Drama', 'Romance'],
 ['Comedy', 'Drama', 'Romance'],
 ['Drama', 'Horror', 'Sci-Fi'],
 ['Comedy', 'Horror', 'Romance'],
 ['Drama'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Adventure', 'Drama'],
 ['Biography', 'Comedy', 'Drama'],
 ['Drama', 'Mystery', 'Romance'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Drama', 'Romance', 'Sci-Fi'],
 ['Drama'],
 ['Drama', 'Fantasy'],
 ['Drama', 'Romance'],
 ['Comedy', 'Horror', 'Thriller'],
 ['Comedy', 'Drama', 'Romance'],
 ['Crime', 'Drama'],
 ['Comedy', 'Romance'],
 ['Action', 'Drama', 'Family'],
 ['Comedy', 'Drama', 'Romance'],
 ['Action', 'Thriller', 'War'],
 ['Action', 'Comedy', 'Horror'],
 ['Biography', 'Drama', 'Sport'],
 ['Adventure', 'Comedy', 'Drama'],
 ['Comedy', 'Romance'],
 ['Comedy', 'Romance'],
 ['Comedy', 'Drama', 'Romance'],
 ['Action', 'Adventure', 'Crime'],
 ['Comedy', 'Romance'],
 ['Animation', 'Action', 'Adventure'],
 ['Action', 'Crime', 'Sci-Fi'],
 ['Drama'],
 ['Comedy', 'Drama', 'Romance'],
 ['Crime', 'Thriller'],
 ['Comedy', 'Horror', 'Sci-Fi'],
 ['Drama', 'Thriller'],
 ['Drama', 'Fantasy', 'Horror'],
 ['Thriller'],
 ['Adventure', 'Drama', 'Family'],
 ['Mystery', 'Sci-Fi', 'Thriller'],
 ['Biography', 'Crime', 'Drama'],
 ['Drama', 'Fantasy', 'Horror'],
 ['Action', 'Adventure', 'Thriller'],
 ['Crime', 'Drama', 'Horror'],
 ['Crime', 'Drama', 'Fantasy'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Action', 'Adventure', 'Drama'],
 ['Action', 'Comedy', 'Horror'],
 ['Comedy', 'Drama', 'Family'],
 ['Action', 'Thriller'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Adventure', 'Drama', 'Fantasy'],
 ['Drama'],
 ['Drama'],
 ['Comedy'],
 ['Drama'],
 ['Comedy', 'Drama', 'Music'],
 ['Drama', 'Fantasy', 'Music'],
 ['Drama'],
 ['Thriller'],
 ['Comedy', 'Horror'],
 ['Action', 'Comedy', 'Sport'],
 ['Horror'],
 ['Comedy', 'Drama'],
 ['Action', 'Drama', 'Thriller'],
 ['Drama', 'Romance'],
 ['Horror', 'Mystery'],
 ['Adventure', 'Drama', 'Fantasy'],
 ['Thriller'],
 ['Comedy', 'Romance'],
 ['Action', 'Sci-Fi', 'Thriller'],
 ['Fantasy', 'Mystery', 'Thriller'],
 ['Biography', 'Drama'],
 ['Crime', 'Drama'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Adventure'],
 ['Comedy', 'Drama'],
 ['Comedy', 'Drama'],
 ['Comedy', 'Drama', 'Romance'],
 ['Adventure', 'Comedy', 'Drama'],
 ['Action', 'Sci-Fi', 'Thriller'],
 ['Comedy', 'Romance'],
 ['Action', 'Fantasy', 'Horror'],
 ['Crime', 'Drama', 'Thriller'],
 ['Action', 'Drama', 'Thriller'],
 ['Crime', 'Drama', 'Mystery'],
 ['Crime', 'Drama', 'Mystery'],
 ['Drama', 'Sci-Fi', 'Thriller'],
 ['Biography', 'Drama', 'History'],
 ['Crime', 'Horror', 'Thriller'],
 ['Drama'],
 ['Drama', 'Mystery', 'Thriller'],
 ['Adventure', 'Biography'],
 ['Adventure', 'Biography', 'Crime'],
 ['Action', 'Horror', 'Thriller'],
 ['Action', 'Adventure', 'Western'],
 ['Horror', 'Thriller'],
 ['Drama', 'Mystery', 'Thriller'],
 ['Comedy', 'Drama', 'Musical'],
 ['Horror', 'Mystery'],
 ['Biography', 'Drama', 'Sport'],
 ['Comedy', 'Family', 'Romance'],
 ['Drama', 'Mystery', 'Thriller'],
 ['Comedy'],
 ['Drama'],
 ['Drama', 'Thriller'],
 ['Biography', 'Drama', 'Family'],
 ['Comedy', 'Drama', 'Family'],
 ['Drama', 'Fantasy', 'Musical'],
 ['Comedy'],
 ['Adventure', 'Family'],
 ['Adventure', 'Comedy', 'Fantasy'],
 ['Horror', 'Thriller'],
 ['Drama', 'Romance'],
 ['Horror'],
 ['Biography', 'Drama', 'History'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Drama', 'Family', 'Music'],
 ['Comedy', 'Drama', 'Romance'],
 ['Action', 'Adventure', 'Horror'],
 ['Comedy'],
 ['Crime', 'Drama', 'Mystery'],
 ['Horror'],
 ['Drama', 'Music', 'Romance'],
 ['Adventure', 'Comedy'],
 ['Comedy', 'Family', 'Fantasy']]
genres = np.unique([j for i in movie_genre for j in i])
genres
array(['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime',
       'Drama', 'Family', 'Fantasy', 'History', 'Horror', 'Music',
       'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Sport', 'Thriller',
       'War', 'Western'], dtype=' 
# 每个类别有几部电影
count = pd.Dataframe(np.zeros(shape=[1000,20],dtype="int32"),columns=genres)
count.head()
ActionAdventureAnimationBiographyComedyCrimeDramaFamilyFantasyHistoryHorrorMusicMusicalMysteryRomanceSci-FiSportThrillerWarWestern000000000000000000000100000000000000000000200000000000000000000300000000000000000000400000000000000000000
# 计数填表
for i in range(1000):
    count.loc[i, movie_genre[i]] = 1
count
ActionAdventureAnimationBiographyComedyCrimeDramaFamilyFantasyHistoryHorrorMusicMusicalMysteryRomanceSci-FiSportThrillerWarWestern011000000000000010000101000000000001010000200000000001000000100300101001000000000000411000000100000000000...............................................................9950000011000000100000099600000000001000000000997000000100001001000009980100100000000000000099900001001100000000000

1000 rows × 20 columns

count.sum(axis=0).sort_values(ascending=False).plot(kind="pie",figsize=(10,10),fontsize=20)
 



count.sum(axis=0).sort_values(ascending=False).plot(kind="bar",figsize=(20,9),fontsize=40,colormap="cool")
 



欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/5721380.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-12-18
下一篇 2022-12-17

发表评论

登录后才能评论

评论列表(0条)

保存