数据一:[13098, 11129, 11105, 11157, 11021, 11018, 11028, 11157]网状数据二:[11194, 15090, 11018, 11028, 11009, 15076, 11147, 11105]星状数据三:[11106, 15018, 11020, 11012, 11018, 11212, 11016, 11014]网状数据四:[11131, 11194, 11106, 11012, 15018, 11228, 11013, 11014]网状数据五:[11012, 15018, 15148, 11011, 11013, 11028, 11018, 11014]网状
以数据二为例
二、代码
1. 导入库
import pandas as pd import numpy as np import collections import pickle import networkx as nx import scipy.sparse as sparse import torch import collections import matplotlib.pyplot as plt import random ## 先实例化预定义函数 ## 预定义函数放入附录2. 通用数据读入
# 通用数据读入 allnodes=np.load("../data/allnodes.npy").tolist() ifo = pd.read_csv("../data/info1.csv") bikes7=pd.read_csv("../data/bikefor7days.csv") ridetime=pd.read_csv("../data/ridetime.csv",header=None) ridetime.columns=["index","station1","station2","time"]3. node数据集读入
nodeset=[11194, 15090, 11018, 11028, 11009, 15076, 11147, 11105] # 星状4. ridetime数据获取并保存
## ridetime的修改 ridetime["choose"]=ridetime.apply(lambda row: get_inner(row['station1'],row['station2'],nodeset),axis=1) new_rt=ridetime[ridetime["choose"]][["station1","station2","time"]] new_rt.reset_index(drop=True,inplace=True) new_rt.to_csv("../data"+str(nodeset[0])+"/ridetime.csv",header=None)5. ifo数据获取并保存
# 对上述node完成 ifo_new ## 1.筛选含nodeset的数据集 bikes7["choose"]=bikes7.apply(lambda row: get(row['借车站点'],row['还车站点'],nodeset),axis=1) data=bikes7[bikes7["choose"]] [["借车站点",'还车站点']] ## 2. 内部数据进行标签化,利用get_inner函数 data["inner"]=data.apply(lambda row: get_inner(row['借车站点'],row['还车站点'],nodeset),axis=1) ## 3.计算总流量和内部流量(流量包括起点和终点) inner=collections.defaultdict(lambda: 0) total = collections.defaultdict(lambda:0) for column in ["借车站点","还车站点"]: dfi=data.groupby(column)[["inner"]].sum() dft=data.groupby(column).count() for node in nodeset: inner[node] = dfi.loc[node]["inner"] total[node] = dft.loc[node]["inner"] ## 4. 计算比值,并获得ifo_new ifo_new=pd.Dataframe(columns=ifo.columns) # 建立一个跟ifo表格头相同的df for i in range(len(nodeset)): node = nodeset[i] idx=allnodes.index(node) # 获取ifo的对应索引 ifo_new.loc[i]= ifo.loc[idx] p = inner[node]/total[node] ifo_new.loc[i,"size"]=int(ifo.loc[idx,"size"] *p )+3 ifo_new=ifo_new[['station', 'jing', 'wei', 'name', 'distict','types', 'size']] ifo_new.to_csv("../data"+str(nodeset[0])+"/ifo_"+str(nodeset[0])+".csv",header=None)6. 获得节点的接还时间记录
# 对上述nodeset进行数据记录 ## 1. 晒选内部数据 bikes7["inner_choose"]=bikes7.apply(lambda row: get_inner(row['借车站点'],row['还车站点'],nodeset),axis=1) colu=[["借车站点","j_d",'j_h','j_m'],['还车站点','h_d','h_h','h_m']] ## 2. 分dayhour,分别对借和还 的数据进行筛选,添加标签,修改列明,数据合并 for day in range(20,27): for hour in range(24): # day = 20 # hour =8 DF = pd.Dataframe(columns=["station","M","mode"]) for i in range(2): data=bikes7[bikes7["inner_choose"]] [colu[i]] data.columns=["station","D","H","M"] df = data[(data["D"]==day)&(data['H']==hour)][["station","M"]] df ["mode"]=[i]*(len(df)) DF=pd.concat([df,DF],axis=0) # 数据合并 ## 3.对DF进行数据排序,并修改索引 DF.sort_values( by="M", axis=0, ascending=True, inplace=True)# 对列“M”排序,升序,在原表修改 DF.reset_index(inplace=True,drop=True) ### 保存数据 DF.to_csv("../data"+str(nodeset[0])+"/"+str(day)+str(hour)+".npy")7.获得流量矩阵
# 对上述nodeset获得流量矩阵 ## 1. 选择日期、筛选内部数据 bikes7["inner_choose"]=bikes7.apply(lambda row: get_inner(row['借车站点'],row['还车站点'],nodeset),axis=1) #day = 20 for day in range(20,27): data = bikes7[(bikes7["inner_choose"])&(bikes7["j_d"]==day)] ## 2. 筛选阶段数据,并用groupby进行分组,获得流量 for i in range(3): periods = [[5,6,7,8,9],[10,11,12,13,14],[13,15,16,17,18]] data1 = data[(data["j_h"]>=periods[i][0])&(data['j_h']<=periods[i][-1])] data2=data1.groupby(["借车站点","还车站点"]).count().reset_index()#数据分组后,重置索引 #print(data2) ## 3. 建立 节点与索引的对应关系 nodes=np.array(nodeset) np.save("../data"+str(nodeset[0])+"/nodes.npy",nodes) ## 4. 建立流量矩阵 adj = np.zeros(shape=(len(nodeset),len(nodeset))) for j in range(len(data2)): row = data2.loc[j] id1 = nodeset.index(row[0]) #print(id1) id2 = nodeset.index(row[1]) #print(id2) val = row[2] adj[id1,id2]= val #print(adj) np.save("../data"+str(nodeset[0])+"/adjflow"+str(day)+str(periods[i][0])+".npy",adj)8.获得demand和supply属性
# 对上述nodeset获得demand和supply属性 ## 1.筛选含nodeset的数据集,利用get_inner函数 bikes7["inner"]=bikes7.apply(lambda row: get_inner(row['借车站点'],row['还车站点'],nodeset),axis=1) b = bikes7[bikes7["inner"]] ## 2. 对时间【day,hour】进行循环 for day in range(20,27): for hour in range(0,24): ## 统计数据 data = b[(b["j_d"]==day)&(b["j_h"]==hour)][['借车站点','还车站点']] # print(len(data)) demand,supply=[],[] df1 = data.groupby(["借车站点"]).count().reset_index()#数据分组后,重置索引 df2 = data.groupby(["还车站点"]).count().reset_index() ## 按照列表排序 for node in nodeset: if node in df1["借车站点"].tolist(): demand.append(df1.loc[df1["借车站点"]==node,"还车站点"].values[0]) #将node出流量的计数放入demand中 else: demand.append(0) if node in df2["还车站点"].tolist(): supply.append(df2.loc[df2["还车站点"]==node,"借车站点"].values[0]) #将node入流量的计数放入supply中 else: supply.append(0) ## 存档 f1 = open("../data"+str(nodeset[0])+"/demand.txt","a") f1.write(str([day,hour])) f1.write(str(demand)) f1.write("n") f1.close() f2 = open("../data"+str(nodeset[0])+"/supply.txt","a") f2.write(str([day,hour])) f2.write(str(supply)) f2.write("n") f2.close()
注意到:open(".txt",“a”),中“a"是可继续填写的属性,因此,如果中间报错的话,需要将txt文件删除后再执行代码,防止有错误累计。
——————————————————————————————————
附录:实例化预定义函数def get(x,y,vertexes): if (x in vertexes) or (y in vertexes): return True else: return False def get_inner(x,y,neib): if (x in neib) and (y in neib): return True else: return False def modify_name(x): if x=="第一类边缘量小型": #return "Small-margin stations" return "pink" elif x=="第二类调度需求型": return "blue" #return "Dynamically balanced stations" elif x=="第三类动态均衡型": #return "Scheduling demand stations" return "orange" else: #return "Key guarantee stations" return "red" def get_matrix(adj_file): ## 读取稀疏矩阵 wname = adj_file co_w = sparse.load_npz(wname) values = co_w.data indices = np.vstack((co_w.row, co_w.col)) i = torch.LongTensor(indices) v = torch.FloatTensor(values) shape = co_w.shape adjacency = torch.sparse.FloatTensor(i, v, torch.Size(shape)) adj = adjacency.to_dense() return adj def get_MX(g,state): ## 获得重叠性数据 MG = collections.defaultdict(dict) # 用来记录重叠矩阵 if state=="morning": T =range(5,10) elif state=="noon": T =range(10,15) else: T = range(15,20) for v in vertexes: for t in T: if t in g[v].keys(): # 判断节点v在t是否有记录 for w in g[v][t]: if w in MG[v].keys(): # node w是否记录过,没有记录设为1,有记录则加1 MG[v][w] += 1 else: MG[v][w] = 1 ## 删除值为1的键值对 for v in MG.keys(): for w in list(MG[v].keys()): if MG[v][w]==1: del MG[v][w] return MG def get_Counter(g,state,mode="jie"): MG = collections.Counter() # 计数器 if state=="morning": T =range(5,10) elif state=="noon": T =range(10,15) else: T = range(15,20) for v in vertexes: for t in T: if t in g[v].keys(): # 判断节点v在t是否有记录 if mode == "jie": cl = [(v,w) for w in g[v][t]] # 将借车OD对变为列表 else: cl = [(w,v) for w in g[v][t]] # 将还车OD对变为列表 MG.update(cl) # 放入计数器中 ## 删除计数值为1的键 for k in list(MG.keys()): if MG[k] == 1: del MG[k] return MG
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)