--日粒度维表 drop table dim.dim_date; CREATE TABLE IF NOT EXISTS dim.dim_date( `datetimestr` STRING COMMENT '当前时间(yyyy-MM-dd HH:mm:ss)' ,`yearstr` STRING COMMENT '当前年份' ,`quarterid` BIGINT COMMENT '当前季度' ,`quarter_cn` STRING COMMENT '当前季度中文' ,`monthstr` STRING COMMENT '当前月份' ,`weekofyearid` BIGINT COMMENT '当年第几周' ,`dayofyearid` BIGINT COMMENT '当年第几天' ,`dayofmonthstr` STRING COMMENT '当月日期(dd)' ,`dayofweekid` BIGINT COMMENT '当前周几(0-6)' ,`dayofweek_cn` STRING COMMENT '当前周中文' --,`hourstr` STRING COMMENT '当前小时(HH)' --,`minutestr` STRING COMMENT '当前分钟(mm)' ,`workday_flag` BIGINT COMMENT '是否为工作日(1,是;0,不是)' ,`create_time` STRING COMMENT '数据创建日期(yyyy-MM-dd HH:mm:ss.SSS)' ) COMMENT '日期粒度度维表' --PARTITIonED BY(yearstr STRING COMMENT '当前年分区(yyyy)',monthstr STRING COMMENT '当前月分区(MM)') ROW FORMAT DELIMITED FIELDS TERMINATED BY 't' NULL DEFINED AS '' STORED AS ORC TBLPROPERTIES('orc.compress' = 'SNAPPY','transactional'='false'); --日分钟粒度维表 drop table dim.dim_datetime; CREATE TABLE IF NOT EXISTS dim.dim_datetime( `timestr` STRING COMMENT '当前时间(HH:mm:ss)' ,`hourstr` STRING COMMENT '当前小时(HH)' ,`minutestr` STRING COMMENT '当前分钟(mm)' ) COMMENT '日分钟粒度维表' ROW FORMAT DELIMITED FIELDS TERMINATED BY 't' NULL DEFINED AS '' STORED AS ORC TBLPROPERTIES('orc.compress' = 'SNAPPY','transactional'='false');生成数据
--生成日粒度维表数据 WITH datetime_t AS( SELECT FROM_UNIXTIME(UNIX_TIMESTAMP('2000-01-01 00:00:00') + idx_t.pos*60) as datetimestr FROM ( SELECt POSEXPLODE( SPLIT( REPEAT('o',CAST((UNIX_TIMESTAMP('2000-01-01 23:59:00') - UNIX_TIMESTAMP('2000-01-01 00:00:00'))/60 AS INT)) ,'o' ) ) ) idx_t ), date_t AS ( select date_add('2000-01-01',idx_t.pos) as datetimestr from ( select posexplode( split( repeat('o',datediff(from_unixtime(unix_timestamp('2040-12-31 00:00:00'),'yyyy-MM-dd'), '2000-01-01')) ,'o' ) ) ) idx_t ), tmp AS( select datetimestr AS datetimestr ,substr(datetimestr,1,4) AS yearstr ,CASE WHEN MONTH(datetimestr) <= 3 then 1 WHEN MONTH(datetimestr) <= 6 then 2 WHEN MONTH(datetimestr) <= 9 then 3 ELSE 4 END AS quarterid ,substr(datetimestr,6,2) AS monthstr ,WEEKOFYEAR(datetimestr) AS weekofyearid ,DATEDIFF(datetimestr,CONCAT(YEAR(datetimestr),'-01-01')) + 1 AS dayofyearid ,substr(datetimestr,9,2) AS dayofmonthstr ,PMOD(DATEDIFF(datetimestr, '2000-01-01')-1, 7) AS dayofweekid --,substr(datetimestr,12,2) AS hourstr --,substr(datetimestr,15,2) AS minutestr FROM date_t ) INSERT OVERWRITE TABLE dim.dim_date --PARTITION(yearstr,monthstr) SELECt datetimestr ,yearstr ,quarterid ,CASE WHEN quarterid = 1 then '第一季度' WHEN quarterid = 2 then '第二季度' WHEN quarterid = 3 then '第三季度' WHEN quarterid = 4 then '第四季度' END AS quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,CASE WHEN dayofweekid = 0 then '星期日' WHEN dayofweekid = 1 then '星期一' WHEN dayofweekid = 2 then '星期二' WHEN dayofweekid = 3 then '星期三' WHEN dayofweekid = 4 then '星期四' WHEN dayofweekid = 5 then '星期五' WHEN dayofweekid = 6 then '星期六' END AS dayofweek_cn --,hourstr --,minutestr ,CASE WHEN dayofweekid = 0 or dayofweekid = 6 then 0 ELSE 1 END AS workday_flag ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8') AS create_time --,yearstr --,monthstr FROM tmp; --生成分钟粒度维表数据 WITH datetime_t AS( SELECt FROM_UNIXTIME(UNIX_TIMESTAMP('2000-01-01 00:00:00') + idx_t.pos*60) as datetimestr FROM ( SELECt POSEXPLODE( SPLIT( REPEAT('o',CAST((UNIX_TIMESTAMP('2000-01-01 23:59:00') - UNIX_TIMESTAMP('2000-01-01 00:00:00'))/60 AS INT)) ,'o' ) ) ) idx_t ), tmp AS( select substr(datetimestr,12) AS timestr ,substr(datetimestr,12,2) AS hourstr ,substr(datetimestr,15,2) AS minutestr FROM datetime_t ) INSERT OVERWRITE TABLE dim.dim_datetime select timestr ,hourstr ,minutestr from tmp变更节假日 变更节假日sql
INSERT INTO TABLE dim.dim_date SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,0 AS workday_flag --变更为非工作日 --,1 AS workday_flag --变更为工作日 ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8') AS create_time FROM dim.dim_date where yearstr='2000' and monthstr='01' and dayofmonthstr='31';变更节假日脚本 变更节假日脚本(单个)
#/bin/bash env #update_workday_flag.sh USAGE="Usage:#/bin/bash env #update_workday_flag_batch.sh USAGE="agrs: [yearstr] [monthstr] [dayofmonthstr] [workday_flag] e.g: 2000 01 01 0" function generate_sql(){ if [[ $# -lt 4 ]] ; then echo "Invalid arguments !!!" echo "${USAGE}"; exit 1; fi SQL_UPDATE="INSERT INTO TABLE cdm.dim_date SELECT datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn , AS workday_flag ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8') AS create_time FROM cdm.dim_date where yearstr='' and monthstr='' and dayofmonthstr=''" SQL_QUERY="SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag ,create_time FROM cdm.dim_date where yearstr='' and monthstr='' and dayofmonthstr=''" } for line in `cat days_*.csv` do OLD_IFS="${IFS}" IFS="," ARR=(${line}) IFS="{OLD_IFS}" generate_sql ${ARR[@]} SQL="${SQL};${SQL_UPDATe};${SQL_QUERY}" done beeline --silent=true -n hive -p 123456 -u jdbc:hive2://hadoop01:10000/cdm -e "${SQL}"[yearstr] [monthstr] [dayofmonthstr] [workday_flag] e.g:tee days_2022.csv <变更节假日脚本(批量)2000 01 01 0" function main(){ if [[ $# -lt 4 ]] ; then echo "${USAGE}"; exit 1; fi SQL_UPDATE="INSERT INTO TABLE cdm.dim_date SELECT datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn , AS workday_flag ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8') AS create_time FROM cdm.dim_date where yearstr='' and monthstr='' and dayofmonthstr=''" SQL_QUERY="SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag ,create_time FROM cdm.dim_date where yearstr='' and monthstr='' and dayofmonthstr=''" beeline --silent=true -n hive -p 123456 -u jdbc:hive2://hadoop01:10000/cdm -e "${SQL_UPDATe};${SQL_QUERY}" } main "$@" --使用维表(日期粒度) WITH tmp AS( SELECT datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num FROM dim.dim_date --WHERe yearstr='yyyy' and monthstr='MM' WHERe yearstr='2000' and monthstr='01' ) SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag FROM tmp WHERe num=1 --使用维表(小时粒度) WITH tmp AS( SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num FROM dim.dim_date --WHERe yearstr='yyyy' and monthstr='MM' WHERe yearstr='2000' and monthstr='01' ), date_t AS( SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag FROM tmp WHERe num=1 ), datetime_t AS( SELECt hourstr FROM dim.dim_datetime group by hourstr ) select datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,b.weekofyearid ,b.dayofyearid ,b.dayofmonthstr ,b.dayofweekid ,b.dayofweek_cn ,a.hourstr ,b.workday_flag from datetime_t a,date_t b --使用维表(分钟粒度) WITH tmp AS( SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num FROM dim.dim_date --WHERe yearstr='yyyy' and monthstr='MM' WHERe yearstr='2000' and monthstr='01' ), date_t AS( SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag FROM tmp WHERe num=1 ) select concat(b.datetimestr," ",a.timestr) AS datetimestr ,b.yearstr ,b.quarterid ,b.quarter_cn ,b.monthstr ,b.weekofyearid ,b.dayofyearid ,b.dayofmonthstr ,b.dayofweekid ,b.dayofweek_cn ,a.hourstr ,a.minutestr ,b.workday_flag from dim.dim_datetime a,date_t b变更日期数据
使用日期维表欢迎分享,转载请注明来源:内存溢出
评论列表(0条)