微博热搜数据变化趋势视频化展示_随笔

微博热搜数据变化趋势视频化展示
前一篇，说了如何采集微博热搜数据并存储，这篇就讲讲如何将采集到的热搜数据视频化。新年新气象，更新一波。PS：这号鸽到都年更了(｀・ω・´)

基本想法就是将热度数值确定一个椭圆的数据，将热搜词条放在一张图里，将不同时间的热搜数据图片做成视频，这样就可以直观地看到热搜的变化趋势，再添加BGM就成了。

Talk is cheap， show me the code
# -*- coding: utf-8 -*-

import pandas as pd
import random
from tqdm import tqdm
import datetime
import time
import math
import numpy as np

import pygame     #采用pygame绘制图片
import sys
from pygame.locals import *
import cv2
from PIL import ImageGrab     #截屏获取图片形成视频
import moviepy.editor as mpe

import My_Email as my

class WeiboHotnessVideo:
    
    def __init__(self, date):
        '''
        初始化对象
    
        Parameters
        ----------
        date : str
            日期，如20200202.
    
        Returns
        -------
        None.
    
        '''
        self.folder_path = r'XXXXXXXXXXXXweibo_hotness'
        self.date = date       #当前天日期
        self.next_date = datetime.date(int(date[:4]), int(date[4:6]), int(date[-2:])) + datetime.timedelta(days=1)    #下一天日期
        self.next_date = datetime.datetime.strftime(self.next_date, '%Y%m%d')
        self.starttime = '0430'    #每天的开始和结束时间
        self.frame_number = 10     #每列数据间帧数
        self.fps = 60           #pygame帧率
        self.video_fps = 60      #生成的视频帧率
        self.background_color = (255, 255, 255)    #视频背景颜色
        self.hotness_video()    #数据视频化
        self.add_BGM()          #配音

    def hotness_video(self):
        '''
        Returns
        -------
        None.
    
        '''
        all_frame_data = self.hotness_data_settle()      #处理热度数据
        #重置文件夹地址
        self.folder_path = r'D:weibo_hotness_video'
        
        #录屏初始化
        p = ImageGrab.grab()#获得当前屏幕
        a,b=p.size#获得当前屏幕的大小
        self.screen_width = a
        self.screen_height = b
        self.screen_ratio = self.screen_height/self.screen_width
        fourcc = cv2.VideoWriter_fourcc(*'XVID')#编码格式
        #输出文件命名为test.mp4,帧率为60，可以自己设置
        video = cv2.VideoWriter(self.folder_path + 'VIDEO%s.avi'%self.date, fourcc, self.video_fps, (a, b))
        
        # pygame 初始化
        pygame.init()
        FPSClock = pygame.time.Clock()
        screen = pygame.display.set_mode((self.screen_width, self.screen_height))       
        
        
        hotness_title_number = len(all_frame_data)
        previous_frame_data = None

        #制作片头
        movie_start_text = '%s年%s月%s日微博热搜动态'%(self.date[:4], self.date[4:6], self.date[-2:])
        #设置片头背景图片
        background = pygame.image.load(self.folder_path + 'movie_start.jpg')
        #确定文字格式和大小
        my_font = pygame.font.Font(self.folder_path + r'STXINGKA.TTF',120)
        #新建文本图章
        textImage = my_font.render(movie_start_text, True, (255,153,0))
        #文本框尺寸
        text_size_x, text_size_y = textImage.get_size()
        
        for i in range(0, int(1.5*self.video_fps)):       
            screen.blit(background,(0,0))  #对齐的坐标
            #绘制文本
            pos_x = 0.5*(self.screen_width-text_size_x)     #横坐标
            pos_y = 0.5*(self.screen_height-text_size_y)    #纵坐标
            if i < self.video_fps:
                visible_area = pygame.Rect(0,0, text_size_x*i/self.video_fps, text_size_y)     #可见区域
            else:
                visible_area = pygame.Rect(0,0, text_size_x, text_size_y)
            screen.blit(textImage, (pos_x, pos_y), visible_area)
            pygame.display.update()     #刷新显示
            #录屏
            im = ImageGrab.grab()
            imm=cv2.cvtColor(np.array(im), cv2.COLOR_RGB2BGR)#转为opencv的BGR格式
            video.write(imm) 
            #帧率
            FPSClock.tick(self.fps)
        # pygame.quit()
        # sys.exit()

        #循环跌打绘制每帧数据
        print('开始循环跌打绘制每帧数据！！！')
        
        for frame in tqdm(all_frame_data.columns):
            for event in pygame.event.get():
                if event.type == QUIT:
                    pygame.quit()
                    sys.exit()

            #背景颜色为白色
            screen.fill(self.background_color)            
            #当前帧数据
            current_frame_data = all_frame_data[[frame]].sort_values(by = frame, ascending=True)
            #当前帧时间
            frame_time = current_frame_data.columns[0][:-2]
            frame_time = frame_time[:4]+'/'+frame_time[4:6]+'/'+frame_time[6:8]+' ' + frame_time[8:10]+':'+frame_time[-2:]
            #temp_previous_frame_data记录位置数据
            temp_index = current_frame_data[current_frame_data[frame] > 0].index
            temp_previous_frame_data = pd.Dataframe(index = temp_index, columns = ['pos_x','pos_y', 'size_x', 'size_y'])
            for i in range(0, hotness_title_number):
                text = current_frame_data.index[i]     #热搜文本
                hotness_number = current_frame_data.iloc[i, 0]     #热度
                
                if hotness_number == 0:
                    continue
                
                #设置字体颜色，默认黑色
                text_color = (0, 0, 0)
                #设置图元颜色以及椭圆尺寸
                if hotness_number < 1E4:
                    color = (0, 128, 0)        #椭圆填充颜色
                    size_x = 40    #确定椭圆尺寸，size是椭圆的矩形尺寸
                    
                elif hotness_number >= 1E4 and hotness_number < 2E5:
                    R_color = int(round(((hotness_number-1E4)/1.9E5)*128))
                    color = (R_color, 128, 0)
                    size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2)    #确定椭圆尺寸，size_x是椭圆的矩形长边尺寸
                    
                elif hotness_number >= 2E5 and hotness_number < 1E6:
                    R_color = 127 + int(round(((hotness_number-2E5)/8E5)*128))
                    color = (R_color, 128, 0)                   
                    size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2)
                    
                elif hotness_number >= 1E6 and hotness_number <5E6:
                    G_color = 128 - int(round(((hotness_number-1E6)/4E6)*128))
                    color = (255, G_color, 0)
                    size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2)
                    
                elif hotness_number >= 5E6 and hotness_number < 1.5E7:
                    R_color = 255 - int(round(((hotness_number-5E6)/1E7)*128))
                    color = (R_color, 0, 0)
                    size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2)
                    
                elif hotness_number >= 1.5E7 and hotness_number < 2E8:
                    R_color = 128 - int(round(((hotness_number-1.5E7)/1.85E8)*128))
                    color = (R_color, 0, 0)
                    text_color = (255,255,255)
                    size_x = round((0.775+math.sqrt(hotness_number/2E8))*self.screen_width, 2)
                else:
                    color = (0, 0, 0)
                    text_color = (255,255,255)   
                    size_x = round(2*self.screen_width, 2)
                    
                size_y = round(size_x*self.screen_ratio, 2)    
                
                #确定椭圆位置
                if previous_frame_data is None:
                    pos_x, pos_y = self.calculate_localtion(temp_previous_frame_data, 0.8, size_x, size_y)
                    #创建previous_frame_data
                    temp_previous_frame_data.loc[text] = [pos_x, pos_y, size_x, size_y]
                    
                else:
                    #如果前一帧有则位置不变
                    if text in previous_frame_data.index:
                        pos_x = previous_frame_data.loc[text]['pos_x']
                        pos_y = previous_frame_data.loc[text]['pos_y']
                    else:     #没有则随机位置
                        pos_x, pos_y = self.calculate_localtion(previous_frame_data, 0.8, size_x, size_y)
                    #temp_previous_frame_data记录位置数据
                    temp_previous_frame_data.loc[text] = [pos_x, pos_y, size_x, size_y]  
  
                #绘制椭圆
                pygame.draw.ellipse(screen, color, [pos_x-0.5*size_x, pos_y-0.5*size_y, size_x, size_y])
                pygame.draw.ellipse(screen, (0,0,0), [pos_x-0.5*size_x, pos_y-0.5*size_y, size_x, size_y], 3)
                
                
                #绘制文字，首先根据长度切割文本
                if len(text)<7:    #根据字数划分text，长于7个字符则分割
                    text_top = None
                    text_center = text
                    text_bottom = None
                else:
                    split = int(math.ceil(len(text)*3/7))  #中间字段字数
                    first = int(math.ceil(len(text)-split)/2)
                    text_top = text[:first]
                    text_center = text[first:first+split]
                    text_bottom = text[first+split:]            
                #根据热度和是否分割字符串确定字符显示大小
                if text_top is None:
                    text_size = int(round(size_x/len(text)))
                else:
                    if len(text) < 9:
                        text_size = int(round(0.6*size_x/len(text_center)))
                    elif len(text) < 11:
                        text_size = int(round(0.7*size_x/len(text_center)))
                    else:
                        text_size = int(round(size_x/len(text_center)))
                #确定文字格式和大小
                my_font = pygame.font.Font(self.folder_path + r'STXINGKA.TTF',text_size)
                #绘制文本
                if text_top is not None:
                    #新建文本图章
                    textImage = my_font.render(text_top, True, text_color)
                    #文本框尺寸
                    text_size_x, text_size_y = textImage.get_size()
                    screen.blit(textImage, (pos_x - 0.5*text_size_x, pos_y - 1.3*text_size_y))    
                if text_bottom is not None:
                    #新建文本图章
                    textImage = my_font.render(text_bottom, True, text_color)
                    #文本框尺寸
                    text_size_x, text_size_y = textImage.get_size()
                    screen.blit(textImage, (pos_x - 0.5*text_size_x, pos_y + 0.3*text_size_y))  
                #新建文本图章
                textImage = my_font.render(text_center, True, text_color)
                #文本框尺寸
                text_size_x, text_size_y = textImage.get_size()
                screen.blit(textImage, (pos_x - 0.5*text_size_x, pos_y - 0.5*text_size_y))

            #确定备注文字格式和大小
            my_font = pygame.font.Font(self.folder_path + 'STXINGKA.TTF',40)            
            #确定文本框背景颜色
            if text_color == (0,0,0):
                bcolor = (255,255,255)
            else:
                bcolor = (0,0,0)
            textImage = my_font.render(frame_time, True, text_color, bcolor)
            #文本框尺寸
            text_size_x, text_size_y = textImage.get_size()
            screen.blit(textImage, (4, self.screen_height-44))                
            previous_frame_data = temp_previous_frame_data
            pygame.display.update()
            #录屏
            im = ImageGrab.grab()
            imm=cv2.cvtColor(np.array(im), cv2.COLOR_RGB2BGR)#转为opencv的BGR格式
            video.write(imm) 
            #帧率
            FPSClock.tick(self.fps)     
        video.release()
        pygame.quit()

    def hotness_data_settle(self):
        '''
        根据输入日期将数据划分, 插值为对应的帧数，输出插值后的整合数据

        Returns
        -------
        Dataframe, 整理合并后的热度数据.

        '''
        try:
            data_current = pd.read_csv(self.folder_path + r'data%s.csv'%self.date, index_col=0)     #前一天热度数据
            data_next = pd.read_csv(self.folder_path + 'data%s.csv'%self.next_date, index_col=0)   #后一天热度数据
        except Exception as e:
            print(e)
            sys.exit()
        #截取需要的时间段热度数据
        data_current = data_current.loc[data_current.index >= int(self.starttime)]
        data_next = data_next.loc[data_next.index <= int(self.starttime)]
        #热度数据时间标准化
        data_current.index = [self.date+'0'*(4-len(str(x))) + str(x) for x in data_current.index]
        data_next.index = [self.next_date+'0'*(4-len(str(x))) + str(x) for x in data_next.index]
        all_data = data_current.append(data_next)     #合并热度数据
        all_frame_data = pd.Dataframe()
        print('开始进行数据整理合并！！！')
        for i in tqdm(range(1, len(all_data))):
            data_early = list(all_data.iloc[i-1])     #前一帧数据
            data_latter = list(all_data.iloc[i])      #后一帧数据
            time_early = str(all_data.index[i-1])           #前一帧时间
            time_latter = str(all_data.index[i])         #后一帧时间
            time_early = datetime.datetime.strptime(time_early, '%Y%m%d%H%M')
            time_latter = datetime.datetime.strptime(time_latter, '%Y%m%d%H%M')
            time_difference = time_latter - time_early       #时间差

            time_list = [time_early+datetime.timedelta(seconds=30*x) for x in range(0, int(time_difference.total_seconds()/30))]
            time_list = [datetime.datetime.strftime(x, '%Y%m%d%H%M%S') for x in time_list]
            data_frame = pd.Dataframe(columns=time_list)
            for j in range(0, 50):
                text_early = data_early[2*j]
                hotness_early = data_early[2*j+1]
                
                if text_early in data_latter:    #前后都有一样的text的话插值,每5min10帧
                    hotness_latter = data_latter[data_latter.index(text_early)+1]
                    frame_increment = (hotness_latter - hotness_early)/(len(time_list))     #每帧增加的热度                    
                    data_frame.loc[text_early] = [hotness_early + x*frame_increment for x in range(0,len(time_list))]
                else:    #后面没有则10,8,6,4,2,0插值
                    frame_increment = 2*hotness_latter/(len(time_list))
                    middle_point = int(0.5*len(time_list))+1        #分割中间点
                    list_head = [hotness_early + x*frame_increment for x in range(0, middle_point)]                 
                    data_frame.loc[text_early] = list_head + [0 for x in range(1, middle_point-1)]
            for j in range(0, 50):
                text_latter = data_latter[2*j]
                if text_latter in data_frame.index:
                    pass
                else:    #前面没有的话则后面插值
                    hotness_latter = data_latter[2*j+1]
                    frame_increment = 2*hotness_latter/(len(time_list))
                    middle_point = int(0.5*len(time_list))+1        #分割中间点
                    list_tail = [x*frame_increment for x in range(1, middle_point-1)]
                    data_frame.loc[text_latter] = [0 for x in range(0, middle_point)] + list_tail
            
            all_frame_data = pd.concat([all_frame_data, data_frame], axis = 1, join='outer', sort=False)

        all_frame_data.fillna(0, inplace=True)
        return(all_frame_data)
                  
    def calculate_localtion(self, data, space_level, size_x, size_y):
        '''
        根据给定的已存在图元的x/y坐标数据计算新图元的坐标数据

        Parameters
        ----------
        data: Dataframe
            已存在图元的x/y坐标数据.
        space_level : Dataframe
            图元间间距等级.
        size_x : Float
            当前图元x坐标.
        size_y : Float
            当前图元y坐标.

        Returns
        -------
        pos_x : float
            新图元的x坐标.
        pos_y : float
            新图元的y坐标.

        '''          
        for i in range(0,10):
            pos_x = round(random.uniform(self.screen_width*0.1, self.screen_width*0.9), 2)
            pos_y = round(random.uniform(self.screen_height*0.1, self.screen_height*0.9), 2)
            flag = 0    #flag
            for index in data.index:
                previous_pos_x = data.loc[index]['pos_x']
                previous_pos_y = data.loc[index]['pos_y']
                previous_size_x = data.loc[index]['size_x']
                previous_size_y = data.loc[index]['size_y']
                if abs(pos_x-previous_pos_x) < space_level*abs(previous_size_x+size_x) and abs(pos_y-previous_pos_y) < space_level*abs(previous_size_y+size_y):
                    flag = 1     #如果间距过近则flag=1，终止迭代
                    break
            if flag == 0:
                return pos_x, pos_y
        return pos_x, pos_y       #迭代后依旧未找到合适的值则返回随机值
    
    def add_BGM(self):
        '''
        为hotness_video函数生成的视频添加BGM

        Returns
        -------
        None.

        '''
        video_path = self.folder_path + 'VIDEO%s.avi'%self.date
        BGM_path = self.folder_path + 'BGM专题片纪录片常用流行音乐-大气恢弘有气质-公司介绍(Corp_爱给网_aigei_com.mp3'
        video = mpe.VideoFileClip(video_path)
        video_duration = video.duration
        audio_clip = mpe.AudioFileClip(BGM_path).set_end(video_duration)
        video = video.set_audio(audio_clip)
        export_video_path = self.folder_path + 'VIDEO%sX.mp4'%self.date
        video.write_videofile(export_video_path)
        
if __name__ == '__main__':
    yesterday = datetime.datetime.today() + datetime.timedelta(-1)
    yesterday = yesterday.strftime('%Y%m%d')
    video = WeiboHotnessVideo(yesterday)
这样就完成了所有的工作，生成并保存了微博热搜数据随时间的变化趋势视频。
2021年12月30日微博热搜
好的，本期内容就是这样，感谢大家的关注，再见！！！
欢迎分享，转载请注明来源：内存溢出
原文地址: http://outofmemory.cn/zaji/5689408.html
微博热搜数据变化趋势视频化展示

发表评论

评论列表（0条）