前一篇,说了如何采集微博热搜数据并存储,这篇就讲讲如何将采集到的热搜数据视频化。新年新气象,更新一波。PS:这号鸽到都年更了(`・ω・´)
基本想法就是将热度数值确定一个椭圆的数据,将热搜词条放在一张图里,将不同时间的热搜数据图片做成视频,这样就可以直观地看到热搜的变化趋势,再添加BGM就成了。
Talk is cheap, show me the code
# -*- coding: utf-8 -*- import pandas as pd import random from tqdm import tqdm import datetime import time import math import numpy as np import pygame #采用pygame绘制图片 import sys from pygame.locals import * import cv2 from PIL import ImageGrab #截屏获取图片形成视频 import moviepy.editor as mpe import My_Email as my class WeiboHotnessVideo: def __init__(self, date): ''' 初始化对象 Parameters ---------- date : str 日期,如20200202. Returns ------- None. ''' self.folder_path = r'XXXXXXXXXXXXweibo_hotness' self.date = date #当前天日期 self.next_date = datetime.date(int(date[:4]), int(date[4:6]), int(date[-2:])) + datetime.timedelta(days=1) #下一天日期 self.next_date = datetime.datetime.strftime(self.next_date, '%Y%m%d') self.starttime = '0430' #每天的开始和结束时间 self.frame_number = 10 #每列数据间帧数 self.fps = 60 #pygame帧率 self.video_fps = 60 #生成的视频帧率 self.background_color = (255, 255, 255) #视频背景颜色 self.hotness_video() #数据视频化 self.add_BGM() #配音 def hotness_video(self): ''' Returns ------- None. ''' all_frame_data = self.hotness_data_settle() #处理热度数据 #重置文件夹地址 self.folder_path = r'D:weibo_hotness_video' #录屏初始化 p = ImageGrab.grab()#获得当前屏幕 a,b=p.size#获得当前屏幕的大小 self.screen_width = a self.screen_height = b self.screen_ratio = self.screen_height/self.screen_width fourcc = cv2.VideoWriter_fourcc(*'XVID')#编码格式 #输出文件命名为test.mp4,帧率为60,可以自己设置 video = cv2.VideoWriter(self.folder_path + 'VIDEO%s.avi'%self.date, fourcc, self.video_fps, (a, b)) # pygame 初始化 pygame.init() FPSClock = pygame.time.Clock() screen = pygame.display.set_mode((self.screen_width, self.screen_height)) hotness_title_number = len(all_frame_data) previous_frame_data = None #制作片头 movie_start_text = '%s年%s月%s日微博热搜动态'%(self.date[:4], self.date[4:6], self.date[-2:]) #设置片头背景图片 background = pygame.image.load(self.folder_path + 'movie_start.jpg') #确定文字格式和大小 my_font = pygame.font.Font(self.folder_path + r'STXINGKA.TTF',120) #新建文本图章 textImage = my_font.render(movie_start_text, True, (255,153,0)) #文本框尺寸 text_size_x, text_size_y = textImage.get_size() for i in range(0, int(1.5*self.video_fps)): screen.blit(background,(0,0)) #对齐的坐标 #绘制文本 pos_x = 0.5*(self.screen_width-text_size_x) #横坐标 pos_y = 0.5*(self.screen_height-text_size_y) #纵坐标 if i < self.video_fps: visible_area = pygame.Rect(0,0, text_size_x*i/self.video_fps, text_size_y) #可见区域 else: visible_area = pygame.Rect(0,0, text_size_x, text_size_y) screen.blit(textImage, (pos_x, pos_y), visible_area) pygame.display.update() #刷新显示 #录屏 im = ImageGrab.grab() imm=cv2.cvtColor(np.array(im), cv2.COLOR_RGB2BGR)#转为opencv的BGR格式 video.write(imm) #帧率 FPSClock.tick(self.fps) # pygame.quit() # sys.exit() #循环跌打绘制每帧数据 print('开始循环跌打绘制每帧数据!!!') for frame in tqdm(all_frame_data.columns): for event in pygame.event.get(): if event.type == QUIT: pygame.quit() sys.exit() #背景颜色为白色 screen.fill(self.background_color) #当前帧数据 current_frame_data = all_frame_data[[frame]].sort_values(by = frame, ascending=True) #当前帧时间 frame_time = current_frame_data.columns[0][:-2] frame_time = frame_time[:4]+'/'+frame_time[4:6]+'/'+frame_time[6:8]+' ' + frame_time[8:10]+':'+frame_time[-2:] #temp_previous_frame_data记录位置数据 temp_index = current_frame_data[current_frame_data[frame] > 0].index temp_previous_frame_data = pd.Dataframe(index = temp_index, columns = ['pos_x','pos_y', 'size_x', 'size_y']) for i in range(0, hotness_title_number): text = current_frame_data.index[i] #热搜文本 hotness_number = current_frame_data.iloc[i, 0] #热度 if hotness_number == 0: continue #设置字体颜色,默认黑色 text_color = (0, 0, 0) #设置图元颜色以及椭圆尺寸 if hotness_number < 1E4: color = (0, 128, 0) #椭圆填充颜色 size_x = 40 #确定椭圆尺寸,size是椭圆的矩形尺寸 elif hotness_number >= 1E4 and hotness_number < 2E5: R_color = int(round(((hotness_number-1E4)/1.9E5)*128)) color = (R_color, 128, 0) size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2) #确定椭圆尺寸,size_x是椭圆的矩形长边尺寸 elif hotness_number >= 2E5 and hotness_number < 1E6: R_color = 127 + int(round(((hotness_number-2E5)/8E5)*128)) color = (R_color, 128, 0) size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2) elif hotness_number >= 1E6 and hotness_number <5E6: G_color = 128 - int(round(((hotness_number-1E6)/4E6)*128)) color = (255, G_color, 0) size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2) elif hotness_number >= 5E6 and hotness_number < 1.5E7: R_color = 255 - int(round(((hotness_number-5E6)/1E7)*128)) color = (R_color, 0, 0) size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2) elif hotness_number >= 1.5E7 and hotness_number < 2E8: R_color = 128 - int(round(((hotness_number-1.5E7)/1.85E8)*128)) color = (R_color, 0, 0) text_color = (255,255,255) size_x = round((0.775+math.sqrt(hotness_number/2E8))*self.screen_width, 2) else: color = (0, 0, 0) text_color = (255,255,255) size_x = round(2*self.screen_width, 2) size_y = round(size_x*self.screen_ratio, 2) #确定椭圆位置 if previous_frame_data is None: pos_x, pos_y = self.calculate_localtion(temp_previous_frame_data, 0.8, size_x, size_y) #创建previous_frame_data temp_previous_frame_data.loc[text] = [pos_x, pos_y, size_x, size_y] else: #如果前一帧有则位置不变 if text in previous_frame_data.index: pos_x = previous_frame_data.loc[text]['pos_x'] pos_y = previous_frame_data.loc[text]['pos_y'] else: #没有则随机位置 pos_x, pos_y = self.calculate_localtion(previous_frame_data, 0.8, size_x, size_y) #temp_previous_frame_data记录位置数据 temp_previous_frame_data.loc[text] = [pos_x, pos_y, size_x, size_y] #绘制椭圆 pygame.draw.ellipse(screen, color, [pos_x-0.5*size_x, pos_y-0.5*size_y, size_x, size_y]) pygame.draw.ellipse(screen, (0,0,0), [pos_x-0.5*size_x, pos_y-0.5*size_y, size_x, size_y], 3) #绘制文字,首先根据长度切割文本 if len(text)<7: #根据字数划分text,长于7个字符则分割 text_top = None text_center = text text_bottom = None else: split = int(math.ceil(len(text)*3/7)) #中间字段字数 first = int(math.ceil(len(text)-split)/2) text_top = text[:first] text_center = text[first:first+split] text_bottom = text[first+split:] #根据热度和是否分割字符串确定字符显示大小 if text_top is None: text_size = int(round(size_x/len(text))) else: if len(text) < 9: text_size = int(round(0.6*size_x/len(text_center))) elif len(text) < 11: text_size = int(round(0.7*size_x/len(text_center))) else: text_size = int(round(size_x/len(text_center))) #确定文字格式和大小 my_font = pygame.font.Font(self.folder_path + r'STXINGKA.TTF',text_size) #绘制文本 if text_top is not None: #新建文本图章 textImage = my_font.render(text_top, True, text_color) #文本框尺寸 text_size_x, text_size_y = textImage.get_size() screen.blit(textImage, (pos_x - 0.5*text_size_x, pos_y - 1.3*text_size_y)) if text_bottom is not None: #新建文本图章 textImage = my_font.render(text_bottom, True, text_color) #文本框尺寸 text_size_x, text_size_y = textImage.get_size() screen.blit(textImage, (pos_x - 0.5*text_size_x, pos_y + 0.3*text_size_y)) #新建文本图章 textImage = my_font.render(text_center, True, text_color) #文本框尺寸 text_size_x, text_size_y = textImage.get_size() screen.blit(textImage, (pos_x - 0.5*text_size_x, pos_y - 0.5*text_size_y)) #确定备注文字格式和大小 my_font = pygame.font.Font(self.folder_path + 'STXINGKA.TTF',40) #确定文本框背景颜色 if text_color == (0,0,0): bcolor = (255,255,255) else: bcolor = (0,0,0) textImage = my_font.render(frame_time, True, text_color, bcolor) #文本框尺寸 text_size_x, text_size_y = textImage.get_size() screen.blit(textImage, (4, self.screen_height-44)) previous_frame_data = temp_previous_frame_data pygame.display.update() #录屏 im = ImageGrab.grab() imm=cv2.cvtColor(np.array(im), cv2.COLOR_RGB2BGR)#转为opencv的BGR格式 video.write(imm) #帧率 FPSClock.tick(self.fps) video.release() pygame.quit() def hotness_data_settle(self): ''' 根据输入日期将数据划分, 插值为对应的帧数,输出插值后的整合数据 Returns ------- Dataframe, 整理合并后的热度数据. ''' try: data_current = pd.read_csv(self.folder_path + r'data%s.csv'%self.date, index_col=0) #前一天热度数据 data_next = pd.read_csv(self.folder_path + 'data%s.csv'%self.next_date, index_col=0) #后一天热度数据 except Exception as e: print(e) sys.exit() #截取需要的时间段热度数据 data_current = data_current.loc[data_current.index >= int(self.starttime)] data_next = data_next.loc[data_next.index <= int(self.starttime)] #热度数据时间标准化 data_current.index = [self.date+'0'*(4-len(str(x))) + str(x) for x in data_current.index] data_next.index = [self.next_date+'0'*(4-len(str(x))) + str(x) for x in data_next.index] all_data = data_current.append(data_next) #合并热度数据 all_frame_data = pd.Dataframe() print('开始进行数据整理合并!!!') for i in tqdm(range(1, len(all_data))): data_early = list(all_data.iloc[i-1]) #前一帧数据 data_latter = list(all_data.iloc[i]) #后一帧数据 time_early = str(all_data.index[i-1]) #前一帧时间 time_latter = str(all_data.index[i]) #后一帧时间 time_early = datetime.datetime.strptime(time_early, '%Y%m%d%H%M') time_latter = datetime.datetime.strptime(time_latter, '%Y%m%d%H%M') time_difference = time_latter - time_early #时间差 time_list = [time_early+datetime.timedelta(seconds=30*x) for x in range(0, int(time_difference.total_seconds()/30))] time_list = [datetime.datetime.strftime(x, '%Y%m%d%H%M%S') for x in time_list] data_frame = pd.Dataframe(columns=time_list) for j in range(0, 50): text_early = data_early[2*j] hotness_early = data_early[2*j+1] if text_early in data_latter: #前后都有一样的text的话插值,每5min10帧 hotness_latter = data_latter[data_latter.index(text_early)+1] frame_increment = (hotness_latter - hotness_early)/(len(time_list)) #每帧增加的热度 data_frame.loc[text_early] = [hotness_early + x*frame_increment for x in range(0,len(time_list))] else: #后面没有则10,8,6,4,2,0插值 frame_increment = 2*hotness_latter/(len(time_list)) middle_point = int(0.5*len(time_list))+1 #分割中间点 list_head = [hotness_early + x*frame_increment for x in range(0, middle_point)] data_frame.loc[text_early] = list_head + [0 for x in range(1, middle_point-1)] for j in range(0, 50): text_latter = data_latter[2*j] if text_latter in data_frame.index: pass else: #前面没有的话则后面插值 hotness_latter = data_latter[2*j+1] frame_increment = 2*hotness_latter/(len(time_list)) middle_point = int(0.5*len(time_list))+1 #分割中间点 list_tail = [x*frame_increment for x in range(1, middle_point-1)] data_frame.loc[text_latter] = [0 for x in range(0, middle_point)] + list_tail all_frame_data = pd.concat([all_frame_data, data_frame], axis = 1, join='outer', sort=False) all_frame_data.fillna(0, inplace=True) return(all_frame_data) def calculate_localtion(self, data, space_level, size_x, size_y): ''' 根据给定的已存在图元的x/y坐标数据计算新图元的坐标数据 Parameters ---------- data: Dataframe 已存在图元的x/y坐标数据. space_level : Dataframe 图元间间距等级. size_x : Float 当前图元x坐标. size_y : Float 当前图元y坐标. Returns ------- pos_x : float 新图元的x坐标. pos_y : float 新图元的y坐标. ''' for i in range(0,10): pos_x = round(random.uniform(self.screen_width*0.1, self.screen_width*0.9), 2) pos_y = round(random.uniform(self.screen_height*0.1, self.screen_height*0.9), 2) flag = 0 #flag for index in data.index: previous_pos_x = data.loc[index]['pos_x'] previous_pos_y = data.loc[index]['pos_y'] previous_size_x = data.loc[index]['size_x'] previous_size_y = data.loc[index]['size_y'] if abs(pos_x-previous_pos_x) < space_level*abs(previous_size_x+size_x) and abs(pos_y-previous_pos_y) < space_level*abs(previous_size_y+size_y): flag = 1 #如果间距过近则flag=1,终止迭代 break if flag == 0: return pos_x, pos_y return pos_x, pos_y #迭代后依旧未找到合适的值则返回随机值 def add_BGM(self): ''' 为hotness_video函数生成的视频添加BGM Returns ------- None. ''' video_path = self.folder_path + 'VIDEO%s.avi'%self.date BGM_path = self.folder_path + 'BGM专题片纪录片常用流行音乐-大气恢弘有气质-公司介绍(Corp_爱给网_aigei_com.mp3' video = mpe.VideoFileClip(video_path) video_duration = video.duration audio_clip = mpe.AudioFileClip(BGM_path).set_end(video_duration) video = video.set_audio(audio_clip) export_video_path = self.folder_path + 'VIDEO%sX.mp4'%self.date video.write_videofile(export_video_path) if __name__ == '__main__': yesterday = datetime.datetime.today() + datetime.timedelta(-1) yesterday = yesterday.strftime('%Y%m%d') video = WeiboHotnessVideo(yesterday)
这样就完成了所有的工作,生成并保存了微博热搜数据随时间的变化趋势视频。
2021年12月30日微博热搜
好的,本期内容就是这样,感谢大家的关注,再见!!!
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)