Error[8]: Undefined offset: 7, File: /www/wwwroot/outofmemory.cn/tmp/plugin_ss_superseo_model_superseo.php, Line: 121
File: /www/wwwroot/outofmemory.cn/tmp/plugin_ss_superseo_model_superseo.php, Line: 473, decode(

Hive生成超级日期维度表 创建日期维表
--日粒度维表
drop table dim.dim_date;
CREATE TABLE IF NOT EXISTS dim.dim_date(
     `datetimestr`    STRING COMMENT '当前时间(yyyy-MM-dd HH:mm:ss)'
    ,`yearstr`        STRING COMMENT '当前年份'
    ,`quarterid`      BIGINT COMMENT '当前季度'
    ,`quarter_cn`     STRING COMMENT '当前季度中文'
    ,`monthstr`       STRING COMMENT '当前月份'
    ,`weekofyearid`   BIGINT COMMENT '当年第几周'
    ,`dayofyearid`    BIGINT COMMENT '当年第几天'
    ,`dayofmonthstr`  STRING COMMENT '当月日期(dd)'
    ,`dayofweekid`    BIGINT COMMENT '当前周几(0-6)'
    ,`dayofweek_cn`   STRING COMMENT '当前周中文'
    --,`hourstr`        STRING COMMENT '当前小时(HH)'
    --,`minutestr`      STRING COMMENT '当前分钟(mm)'
    ,`workday_flag`   BIGINT COMMENT '是否为工作日(1,是;0,不是)'
    ,`create_time`    STRING COMMENT '数据创建日期(yyyy-MM-dd HH:mm:ss.SSS)'
    )
COMMENT '日期粒度度维表'
--PARTITIonED BY(yearstr STRING COMMENT '当前年分区(yyyy)',monthstr STRING COMMENT '当前月分区(MM)')
ROW FORMAT DELIMITED
    FIELDS TERMINATED BY 't'
    NULL DEFINED AS ''
    STORED AS ORC
    TBLPROPERTIES('orc.compress' = 'SNAPPY','transactional'='false');
    
--日分钟粒度维表
drop table dim.dim_datetime;
CREATE TABLE IF NOT EXISTS dim.dim_datetime(
     `timestr`        STRING COMMENT '当前时间(HH:mm:ss)'
    ,`hourstr`        STRING COMMENT '当前小时(HH)'
    ,`minutestr`      STRING COMMENT '当前分钟(mm)'
    )
COMMENT '日分钟粒度维表'
ROW FORMAT DELIMITED
    FIELDS TERMINATED BY 't'
    NULL DEFINED AS ''
    STORED AS ORC
    TBLPROPERTIES('orc.compress' = 'SNAPPY','transactional'='false');
生成数据
--生成日粒度维表数据
WITH datetime_t AS(
SELECT FROM_UNIXTIME(UNIX_TIMESTAMP('2000-01-01 00:00:00') + idx_t.pos*60) as datetimestr
FROM
       (
           SELECt  
               POSEXPLODE(
                   SPLIT(
                       REPEAT('o',CAST((UNIX_TIMESTAMP('2000-01-01 23:59:00') - UNIX_TIMESTAMP('2000-01-01 00:00:00'))/60 AS INT))
                       ,'o'
                   )
               )
       ) idx_t
),
date_t AS (
select date_add('2000-01-01',idx_t.pos) as datetimestr
from
       (
           select  
               posexplode(
                   split(
                       repeat('o',datediff(from_unixtime(unix_timestamp('2040-12-31 00:00:00'),'yyyy-MM-dd'), '2000-01-01'))
                       ,'o'
                   )
               )
       ) idx_t
),
tmp AS(
select
       datetimestr                                                    AS datetimestr
      ,substr(datetimestr,1,4)                                        AS yearstr
      ,CASE  WHEN MONTH(datetimestr) <= 3  then 1
             WHEN MONTH(datetimestr) <= 6  then 2
             WHEN MONTH(datetimestr) <= 9  then 3
             ELSE 4
       END                                                            AS quarterid
      ,substr(datetimestr,6,2)                                        AS monthstr
      ,WEEKOFYEAR(datetimestr)                                        AS weekofyearid
      ,DATEDIFF(datetimestr,CONCAT(YEAR(datetimestr),'-01-01')) + 1   AS dayofyearid
      ,substr(datetimestr,9,2)                                        AS dayofmonthstr
      ,PMOD(DATEDIFF(datetimestr, '2000-01-01')-1, 7)                 AS dayofweekid
      --,substr(datetimestr,12,2)                                       AS hourstr
      --,substr(datetimestr,15,2)                                       AS minutestr
FROM  date_t
)
INSERT OVERWRITE TABLE dim.dim_date --PARTITION(yearstr,monthstr)
SELECt
       datetimestr
      ,yearstr
      ,quarterid
      ,CASE  WHEN quarterid = 1   then '第一季度'
             WHEN quarterid = 2   then '第二季度'
             WHEN quarterid = 3   then '第三季度'
             WHEN quarterid = 4   then '第四季度'
       END                                                            AS quarter_cn
      ,monthstr
      ,weekofyearid
      ,dayofyearid
      ,dayofmonthstr
      ,dayofweekid
      ,CASE  WHEN dayofweekid = 0  then '星期日'
             WHEN dayofweekid = 1  then '星期一'
             WHEN dayofweekid = 2  then '星期二'
             WHEN dayofweekid = 3  then '星期三'
             WHEN dayofweekid = 4  then '星期四'
             WHEN dayofweekid = 5  then '星期五'
             WHEN dayofweekid = 6  then '星期六'
       END                                                            AS dayofweek_cn
      --,hourstr
      --,minutestr
      ,CASE  WHEN dayofweekid = 0 or dayofweekid = 6 then 0
             ELSE 1
       END                                                            AS workday_flag
      ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8')                  AS create_time
      --,yearstr
      --,monthstr
FROM  tmp;

--生成分钟粒度维表数据
WITH datetime_t AS(
SELECt FROM_UNIXTIME(UNIX_TIMESTAMP('2000-01-01 00:00:00') + idx_t.pos*60) as datetimestr
FROM
       (
           SELECt  
               POSEXPLODE(
                   SPLIT(
                       REPEAT('o',CAST((UNIX_TIMESTAMP('2000-01-01 23:59:00') - UNIX_TIMESTAMP('2000-01-01 00:00:00'))/60 AS INT))
                       ,'o'
                   )
               )
       ) idx_t
),
tmp AS(
select
       substr(datetimestr,12)                                         AS timestr
      ,substr(datetimestr,12,2)                                       AS hourstr
      ,substr(datetimestr,15,2)                                       AS minutestr
FROM  datetime_t
)
INSERT OVERWRITE TABLE dim.dim_datetime
select 
       timestr
      ,hourstr
      ,minutestr
from  tmp 
变更节假日 变更节假日sql
INSERT INTO TABLE dim.dim_date 
SELECt 
        datetimestr  
       ,yearstr
       ,quarterid           
       ,quarter_cn   
       ,monthstr       
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,0 AS workday_flag --变更为非工作日
       --,1 AS workday_flag --变更为工作日
       ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8')                 AS create_time
FROM   dim.dim_date 
where  yearstr='2000' and monthstr='01' and dayofmonthstr='31';
变更节假日脚本 变更节假日脚本(单个)
#/bin/bash env
#update_workday_flag.sh

USAGE="Usage: 
#/bin/bash env
#update_workday_flag_batch.sh

USAGE="agrs:  [yearstr] [monthstr] [dayofmonthstr] [workday_flag]
       e.g:   2000 01 01 0"

function generate_sql(){

    if [[ $# -lt 4 ]] ; then
        echo "Invalid arguments !!!"
        echo "${USAGE}";
        exit 1;
    fi
    
    SQL_UPDATE="INSERT INTO TABLE cdm.dim_date 
         SELECT 
             datetimestr  
            ,yearstr
            ,quarterid           
            ,quarter_cn   
            ,monthstr       
            ,weekofyearid
            ,dayofyearid 
            ,dayofmonthstr
            ,dayofweekid 
            ,dayofweek_cn
            , AS workday_flag
            ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8')                 AS create_time
        FROM   cdm.dim_date 
        where  yearstr='' and monthstr='' and dayofmonthstr=''"
    
    SQL_QUERY="SELECt 
         datetimestr  
        ,yearstr
        ,quarterid           
        ,quarter_cn   
        ,monthstr       
        ,weekofyearid
        ,dayofyearid 
        ,dayofmonthstr
        ,dayofweekid 
        ,dayofweek_cn
        ,workday_flag
        ,create_time
    FROM   cdm.dim_date 
    where  yearstr='' and monthstr='' and dayofmonthstr=''"
}

for line in `cat days_*.csv`
do  
    OLD_IFS="${IFS}"
    IFS=","
    ARR=(${line})
    IFS="{OLD_IFS}"
    generate_sql ${ARR[@]}
    SQL="${SQL};${SQL_UPDATe};${SQL_QUERY}" 
done

beeline --silent=true -n hive -p 123456 -u jdbc:hive2://hadoop01:10000/cdm  -e "${SQL}" 
[yearstr] [monthstr] [dayofmonthstr] [workday_flag] e.g:
tee days_2022.csv < 2000 01 01 0"

function main(){
    if [[ $# -lt 4 ]] ; then
        echo "${USAGE}";
        exit 1;
    fi
    
    SQL_UPDATE="INSERT INTO TABLE cdm.dim_date 
         SELECT 
             datetimestr  
            ,yearstr
            ,quarterid           
            ,quarter_cn   
            ,monthstr       
            ,weekofyearid
            ,dayofyearid 
            ,dayofmonthstr
            ,dayofweekid 
            ,dayofweek_cn
            , AS workday_flag
            ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8')                 AS create_time
        FROM   cdm.dim_date 
        where  yearstr='' and monthstr='' and dayofmonthstr=''"
    
    SQL_QUERY="SELECt 
         datetimestr  
        ,yearstr
        ,quarterid           
        ,quarter_cn   
        ,monthstr       
        ,weekofyearid
        ,dayofyearid 
        ,dayofmonthstr
        ,dayofweekid 
        ,dayofweek_cn
        ,workday_flag
        ,create_time
    FROM   cdm.dim_date 
    where  yearstr='' and monthstr='' and dayofmonthstr=''"
    
    beeline --silent=true -n hive -p 123456 -u jdbc:hive2://hadoop01:10000/cdm  -e "${SQL_UPDATe};${SQL_QUERY}" 

}

main "$@"
变更节假日脚本(批量)
--使用维表(日期粒度)
WITH tmp AS(
SELECT 
        datetimestr
       ,yearstr
       ,quarterid   
       ,quarter_cn  
       ,monthstr
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
       ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num
FROM   dim.dim_date
--WHERe yearstr='yyyy' and monthstr='MM'
WHERe yearstr='2000' and monthstr='01'
)
SELECt 
        datetimestr
       ,yearstr      
       ,quarterid   
       ,quarter_cn  
       ,monthstr     
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
FROM   tmp 
WHERe  num=1

--使用维表(小时粒度)
WITH tmp AS(
SELECt 
        datetimestr
       ,yearstr
       ,quarterid   
       ,quarter_cn  
       ,monthstr
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
       ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num
FROM   dim.dim_date
--WHERe yearstr='yyyy' and monthstr='MM'
WHERe yearstr='2000' and monthstr='01'
),
date_t AS(
SELECt 
        datetimestr
       ,yearstr      
       ,quarterid   
       ,quarter_cn  
       ,monthstr     
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
FROM   tmp 
WHERe  num=1
),
datetime_t AS(
SELECt 
        hourstr
FROM   dim.dim_datetime 
group by hourstr
)
select 
        datetimestr
       ,yearstr      
       ,quarterid   
       ,quarter_cn  
       ,monthstr     
       ,b.weekofyearid
       ,b.dayofyearid
       ,b.dayofmonthstr
       ,b.dayofweekid
       ,b.dayofweek_cn
       ,a.hourstr
       ,b.workday_flag
from   datetime_t a,date_t b

--使用维表(分钟粒度)
WITH tmp AS(
SELECt 
        datetimestr
       ,yearstr
       ,quarterid   
       ,quarter_cn  
       ,monthstr
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
       ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num
FROM   dim.dim_date
--WHERe yearstr='yyyy' and monthstr='MM'
WHERe yearstr='2000' and monthstr='01'
),
date_t AS(
SELECt 
        datetimestr
       ,yearstr      
       ,quarterid   
       ,quarter_cn  
       ,monthstr     
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
FROM   tmp 
WHERe  num=1
)
select 
        concat(b.datetimestr," ",a.timestr)  AS datetimestr
       ,b.yearstr      
       ,b.quarterid   
       ,b.quarter_cn  
       ,b.monthstr     
       ,b.weekofyearid
       ,b.dayofyearid
       ,b.dayofmonthstr
       ,b.dayofweekid
       ,b.dayofweek_cn
       ,a.hourstr
       ,a.minutestr
       ,b.workday_flag
from   dim.dim_datetime a,date_t b

变更日期数据

[+++] 使用日期维表 [+++])
File: /www/wwwroot/outofmemory.cn/tmp/route_read.php, Line: 126, InsideLink()
File: /www/wwwroot/outofmemory.cn/tmp/index.inc.php, Line: 165, include(/www/wwwroot/outofmemory.cn/tmp/route_read.php)
File: /www/wwwroot/outofmemory.cn/index.php, Line: 30, include(/www/wwwroot/outofmemory.cn/tmp/index.inc.php)
Error[8]: Undefined offset: 8, File: /www/wwwroot/outofmemory.cn/tmp/plugin_ss_superseo_model_superseo.php, Line: 121
File: /www/wwwroot/outofmemory.cn/tmp/plugin_ss_superseo_model_superseo.php, Line: 473, decode(

Hive生成超级日期维度表 创建日期维表
--日粒度维表
drop table dim.dim_date;
CREATE TABLE IF NOT EXISTS dim.dim_date(
     `datetimestr`    STRING COMMENT '当前时间(yyyy-MM-dd HH:mm:ss)'
    ,`yearstr`        STRING COMMENT '当前年份'
    ,`quarterid`      BIGINT COMMENT '当前季度'
    ,`quarter_cn`     STRING COMMENT '当前季度中文'
    ,`monthstr`       STRING COMMENT '当前月份'
    ,`weekofyearid`   BIGINT COMMENT '当年第几周'
    ,`dayofyearid`    BIGINT COMMENT '当年第几天'
    ,`dayofmonthstr`  STRING COMMENT '当月日期(dd)'
    ,`dayofweekid`    BIGINT COMMENT '当前周几(0-6)'
    ,`dayofweek_cn`   STRING COMMENT '当前周中文'
    --,`hourstr`        STRING COMMENT '当前小时(HH)'
    --,`minutestr`      STRING COMMENT '当前分钟(mm)'
    ,`workday_flag`   BIGINT COMMENT '是否为工作日(1,是;0,不是)'
    ,`create_time`    STRING COMMENT '数据创建日期(yyyy-MM-dd HH:mm:ss.SSS)'
    )
COMMENT '日期粒度度维表'
--PARTITIonED BY(yearstr STRING COMMENT '当前年分区(yyyy)',monthstr STRING COMMENT '当前月分区(MM)')
ROW FORMAT DELIMITED
    FIELDS TERMINATED BY 't'
    NULL DEFINED AS ''
    STORED AS ORC
    TBLPROPERTIES('orc.compress' = 'SNAPPY','transactional'='false');
    
--日分钟粒度维表
drop table dim.dim_datetime;
CREATE TABLE IF NOT EXISTS dim.dim_datetime(
     `timestr`        STRING COMMENT '当前时间(HH:mm:ss)'
    ,`hourstr`        STRING COMMENT '当前小时(HH)'
    ,`minutestr`      STRING COMMENT '当前分钟(mm)'
    )
COMMENT '日分钟粒度维表'
ROW FORMAT DELIMITED
    FIELDS TERMINATED BY 't'
    NULL DEFINED AS ''
    STORED AS ORC
    TBLPROPERTIES('orc.compress' = 'SNAPPY','transactional'='false');
生成数据
--生成日粒度维表数据
WITH datetime_t AS(
SELECT FROM_UNIXTIME(UNIX_TIMESTAMP('2000-01-01 00:00:00') + idx_t.pos*60) as datetimestr
FROM
       (
           SELECt  
               POSEXPLODE(
                   SPLIT(
                       REPEAT('o',CAST((UNIX_TIMESTAMP('2000-01-01 23:59:00') - UNIX_TIMESTAMP('2000-01-01 00:00:00'))/60 AS INT))
                       ,'o'
                   )
               )
       ) idx_t
),
date_t AS (
select date_add('2000-01-01',idx_t.pos) as datetimestr
from
       (
           select  
               posexplode(
                   split(
                       repeat('o',datediff(from_unixtime(unix_timestamp('2040-12-31 00:00:00'),'yyyy-MM-dd'), '2000-01-01'))
                       ,'o'
                   )
               )
       ) idx_t
),
tmp AS(
select
       datetimestr                                                    AS datetimestr
      ,substr(datetimestr,1,4)                                        AS yearstr
      ,CASE  WHEN MONTH(datetimestr) <= 3  then 1
             WHEN MONTH(datetimestr) <= 6  then 2
             WHEN MONTH(datetimestr) <= 9  then 3
             ELSE 4
       END                                                            AS quarterid
      ,substr(datetimestr,6,2)                                        AS monthstr
      ,WEEKOFYEAR(datetimestr)                                        AS weekofyearid
      ,DATEDIFF(datetimestr,CONCAT(YEAR(datetimestr),'-01-01')) + 1   AS dayofyearid
      ,substr(datetimestr,9,2)                                        AS dayofmonthstr
      ,PMOD(DATEDIFF(datetimestr, '2000-01-01')-1, 7)                 AS dayofweekid
      --,substr(datetimestr,12,2)                                       AS hourstr
      --,substr(datetimestr,15,2)                                       AS minutestr
FROM  date_t
)
INSERT OVERWRITE TABLE dim.dim_date --PARTITION(yearstr,monthstr)
SELECt
       datetimestr
      ,yearstr
      ,quarterid
      ,CASE  WHEN quarterid = 1   then '第一季度'
             WHEN quarterid = 2   then '第二季度'
             WHEN quarterid = 3   then '第三季度'
             WHEN quarterid = 4   then '第四季度'
       END                                                            AS quarter_cn
      ,monthstr
      ,weekofyearid
      ,dayofyearid
      ,dayofmonthstr
      ,dayofweekid
      ,CASE  WHEN dayofweekid = 0  then '星期日'
             WHEN dayofweekid = 1  then '星期一'
             WHEN dayofweekid = 2  then '星期二'
             WHEN dayofweekid = 3  then '星期三'
             WHEN dayofweekid = 4  then '星期四'
             WHEN dayofweekid = 5  then '星期五'
             WHEN dayofweekid = 6  then '星期六'
       END                                                            AS dayofweek_cn
      --,hourstr
      --,minutestr
      ,CASE  WHEN dayofweekid = 0 or dayofweekid = 6 then 0
             ELSE 1
       END                                                            AS workday_flag
      ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8')                  AS create_time
      --,yearstr
      --,monthstr
FROM  tmp;

--生成分钟粒度维表数据
WITH datetime_t AS(
SELECt FROM_UNIXTIME(UNIX_TIMESTAMP('2000-01-01 00:00:00') + idx_t.pos*60) as datetimestr
FROM
       (
           SELECt  
               POSEXPLODE(
                   SPLIT(
                       REPEAT('o',CAST((UNIX_TIMESTAMP('2000-01-01 23:59:00') - UNIX_TIMESTAMP('2000-01-01 00:00:00'))/60 AS INT))
                       ,'o'
                   )
               )
       ) idx_t
),
tmp AS(
select
       substr(datetimestr,12)                                         AS timestr
      ,substr(datetimestr,12,2)                                       AS hourstr
      ,substr(datetimestr,15,2)                                       AS minutestr
FROM  datetime_t
)
INSERT OVERWRITE TABLE dim.dim_datetime
select 
       timestr
      ,hourstr
      ,minutestr
from  tmp 
变更节假日 变更节假日sql
INSERT INTO TABLE dim.dim_date 
SELECt 
        datetimestr  
       ,yearstr
       ,quarterid           
       ,quarter_cn   
       ,monthstr       
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,0 AS workday_flag --变更为非工作日
       --,1 AS workday_flag --变更为工作日
       ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8')                 AS create_time
FROM   dim.dim_date 
where  yearstr='2000' and monthstr='01' and dayofmonthstr='31';
变更节假日脚本 变更节假日脚本(单个)
#/bin/bash env
#update_workday_flag.sh

USAGE="Usage: 
#/bin/bash env
#update_workday_flag_batch.sh

USAGE="agrs:  [yearstr] [monthstr] [dayofmonthstr] [workday_flag]
       e.g:   2000 01 01 0"

function generate_sql(){

    if [[ $# -lt 4 ]] ; then
        echo "Invalid arguments !!!"
        echo "${USAGE}";
        exit 1;
    fi
    
    SQL_UPDATE="INSERT INTO TABLE cdm.dim_date 
         SELECT 
             datetimestr  
            ,yearstr
            ,quarterid           
            ,quarter_cn   
            ,monthstr       
            ,weekofyearid
            ,dayofyearid 
            ,dayofmonthstr
            ,dayofweekid 
            ,dayofweek_cn
            , AS workday_flag
            ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8')                 AS create_time
        FROM   cdm.dim_date 
        where  yearstr='' and monthstr='' and dayofmonthstr=''"
    
    SQL_QUERY="SELECt 
         datetimestr  
        ,yearstr
        ,quarterid           
        ,quarter_cn   
        ,monthstr       
        ,weekofyearid
        ,dayofyearid 
        ,dayofmonthstr
        ,dayofweekid 
        ,dayofweek_cn
        ,workday_flag
        ,create_time
    FROM   cdm.dim_date 
    where  yearstr='' and monthstr='' and dayofmonthstr=''"
}

for line in `cat days_*.csv`
do  
    OLD_IFS="${IFS}"
    IFS=","
    ARR=(${line})
    IFS="{OLD_IFS}"
    generate_sql ${ARR[@]}
    SQL="${SQL};${SQL_UPDATe};${SQL_QUERY}" 
done

beeline --silent=true -n hive -p 123456 -u jdbc:hive2://hadoop01:10000/cdm  -e "${SQL}" 
[yearstr] [monthstr] [dayofmonthstr] [workday_flag] e.g:
tee days_2022.csv < 2000 01 01 0"

function main(){
    if [[ $# -lt 4 ]] ; then
        echo "${USAGE}";
        exit 1;
    fi
    
    SQL_UPDATE="INSERT INTO TABLE cdm.dim_date 
         SELECT 
             datetimestr  
            ,yearstr
            ,quarterid           
            ,quarter_cn   
            ,monthstr       
            ,weekofyearid
            ,dayofyearid 
            ,dayofmonthstr
            ,dayofweekid 
            ,dayofweek_cn
            , AS workday_flag
            ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8')                 AS create_time
        FROM   cdm.dim_date 
        where  yearstr='' and monthstr='' and dayofmonthstr=''"
    
    SQL_QUERY="SELECt 
         datetimestr  
        ,yearstr
        ,quarterid           
        ,quarter_cn   
        ,monthstr       
        ,weekofyearid
        ,dayofyearid 
        ,dayofmonthstr
        ,dayofweekid 
        ,dayofweek_cn
        ,workday_flag
        ,create_time
    FROM   cdm.dim_date 
    where  yearstr='' and monthstr='' and dayofmonthstr=''"
    
    beeline --silent=true -n hive -p 123456 -u jdbc:hive2://hadoop01:10000/cdm  -e "${SQL_UPDATe};${SQL_QUERY}" 

}

main "$@"
变更节假日脚本(批量)
--使用维表(日期粒度)
WITH tmp AS(
SELECT 
        datetimestr
       ,yearstr
       ,quarterid   
       ,quarter_cn  
       ,monthstr
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
       ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num
FROM   dim.dim_date
--WHERe yearstr='yyyy' and monthstr='MM'
WHERe yearstr='2000' and monthstr='01'
)
SELECt 
        datetimestr
       ,yearstr      
       ,quarterid   
       ,quarter_cn  
       ,monthstr     
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
FROM   tmp 
WHERe  num=1

--使用维表(小时粒度)
WITH tmp AS(
SELECt 
        datetimestr
       ,yearstr
       ,quarterid   
       ,quarter_cn  
       ,monthstr
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
       ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num
FROM   dim.dim_date
--WHERe yearstr='yyyy' and monthstr='MM'
WHERe yearstr='2000' and monthstr='01'
),
date_t AS(
SELECt 
        datetimestr
       ,yearstr      
       ,quarterid   
       ,quarter_cn  
       ,monthstr     
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
FROM   tmp 
WHERe  num=1
),
datetime_t AS(
SELECt 
        hourstr
FROM   dim.dim_datetime 
group by hourstr
)
select 
        datetimestr
       ,yearstr      
       ,quarterid   
       ,quarter_cn  
       ,monthstr     
       ,b.weekofyearid
       ,b.dayofyearid
       ,b.dayofmonthstr
       ,b.dayofweekid
       ,b.dayofweek_cn
       ,a.hourstr
       ,b.workday_flag
from   datetime_t a,date_t b

--使用维表(分钟粒度)
WITH tmp AS(
SELECt 
        datetimestr
       ,yearstr
       ,quarterid   
       ,quarter_cn  
       ,monthstr
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
       ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num
FROM   dim.dim_date
--WHERe yearstr='yyyy' and monthstr='MM'
WHERe yearstr='2000' and monthstr='01'
),
date_t AS(
SELECt 
        datetimestr
       ,yearstr      
       ,quarterid   
       ,quarter_cn  
       ,monthstr     
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
FROM   tmp 
WHERe  num=1
)
select 
        concat(b.datetimestr," ",a.timestr)  AS datetimestr
       ,b.yearstr      
       ,b.quarterid   
       ,b.quarter_cn  
       ,b.monthstr     
       ,b.weekofyearid
       ,b.dayofyearid
       ,b.dayofmonthstr
       ,b.dayofweekid
       ,b.dayofweek_cn
       ,a.hourstr
       ,a.minutestr
       ,b.workday_flag
from   dim.dim_datetime a,date_t b

变更日期数据

使用日期维表 [+++])
File: /www/wwwroot/outofmemory.cn/tmp/route_read.php, Line: 126, InsideLink()
File: /www/wwwroot/outofmemory.cn/tmp/index.inc.php, Line: 165, include(/www/wwwroot/outofmemory.cn/tmp/route_read.php)
File: /www/wwwroot/outofmemory.cn/index.php, Line: 30, include(/www/wwwroot/outofmemory.cn/tmp/index.inc.php)
Hive生成超级日期维度表_随笔_内存溢出

Hive生成超级日期维度表

Hive生成超级日期维度表,第1张

Hive生成超级日期维度表 创建日期维表
--日粒度维表
drop table dim.dim_date;
CREATE TABLE IF NOT EXISTS dim.dim_date(
     `datetimestr`    STRING COMMENT '当前时间(yyyy-MM-dd HH:mm:ss)'
    ,`yearstr`        STRING COMMENT '当前年份'
    ,`quarterid`      BIGINT COMMENT '当前季度'
    ,`quarter_cn`     STRING COMMENT '当前季度中文'
    ,`monthstr`       STRING COMMENT '当前月份'
    ,`weekofyearid`   BIGINT COMMENT '当年第几周'
    ,`dayofyearid`    BIGINT COMMENT '当年第几天'
    ,`dayofmonthstr`  STRING COMMENT '当月日期(dd)'
    ,`dayofweekid`    BIGINT COMMENT '当前周几(0-6)'
    ,`dayofweek_cn`   STRING COMMENT '当前周中文'
    --,`hourstr`        STRING COMMENT '当前小时(HH)'
    --,`minutestr`      STRING COMMENT '当前分钟(mm)'
    ,`workday_flag`   BIGINT COMMENT '是否为工作日(1,是;0,不是)'
    ,`create_time`    STRING COMMENT '数据创建日期(yyyy-MM-dd HH:mm:ss.SSS)'
    )
COMMENT '日期粒度度维表'
--PARTITIonED BY(yearstr STRING COMMENT '当前年分区(yyyy)',monthstr STRING COMMENT '当前月分区(MM)')
ROW FORMAT DELIMITED
    FIELDS TERMINATED BY 't'
    NULL DEFINED AS ''
    STORED AS ORC
    TBLPROPERTIES('orc.compress' = 'SNAPPY','transactional'='false');
    
--日分钟粒度维表
drop table dim.dim_datetime;
CREATE TABLE IF NOT EXISTS dim.dim_datetime(
     `timestr`        STRING COMMENT '当前时间(HH:mm:ss)'
    ,`hourstr`        STRING COMMENT '当前小时(HH)'
    ,`minutestr`      STRING COMMENT '当前分钟(mm)'
    )
COMMENT '日分钟粒度维表'
ROW FORMAT DELIMITED
    FIELDS TERMINATED BY 't'
    NULL DEFINED AS ''
    STORED AS ORC
    TBLPROPERTIES('orc.compress' = 'SNAPPY','transactional'='false');
生成数据
--生成日粒度维表数据
WITH datetime_t AS(
SELECT FROM_UNIXTIME(UNIX_TIMESTAMP('2000-01-01 00:00:00') + idx_t.pos*60) as datetimestr
FROM
       (
           SELECt  
               POSEXPLODE(
                   SPLIT(
                       REPEAT('o',CAST((UNIX_TIMESTAMP('2000-01-01 23:59:00') - UNIX_TIMESTAMP('2000-01-01 00:00:00'))/60 AS INT))
                       ,'o'
                   )
               )
       ) idx_t
),
date_t AS (
select date_add('2000-01-01',idx_t.pos) as datetimestr
from
       (
           select  
               posexplode(
                   split(
                       repeat('o',datediff(from_unixtime(unix_timestamp('2040-12-31 00:00:00'),'yyyy-MM-dd'), '2000-01-01'))
                       ,'o'
                   )
               )
       ) idx_t
),
tmp AS(
select
       datetimestr                                                    AS datetimestr
      ,substr(datetimestr,1,4)                                        AS yearstr
      ,CASE  WHEN MONTH(datetimestr) <= 3  then 1
             WHEN MONTH(datetimestr) <= 6  then 2
             WHEN MONTH(datetimestr) <= 9  then 3
             ELSE 4
       END                                                            AS quarterid
      ,substr(datetimestr,6,2)                                        AS monthstr
      ,WEEKOFYEAR(datetimestr)                                        AS weekofyearid
      ,DATEDIFF(datetimestr,CONCAT(YEAR(datetimestr),'-01-01')) + 1   AS dayofyearid
      ,substr(datetimestr,9,2)                                        AS dayofmonthstr
      ,PMOD(DATEDIFF(datetimestr, '2000-01-01')-1, 7)                 AS dayofweekid
      --,substr(datetimestr,12,2)                                       AS hourstr
      --,substr(datetimestr,15,2)                                       AS minutestr
FROM  date_t
)
INSERT OVERWRITE TABLE dim.dim_date --PARTITION(yearstr,monthstr)
SELECt
       datetimestr
      ,yearstr
      ,quarterid
      ,CASE  WHEN quarterid = 1   then '第一季度'
             WHEN quarterid = 2   then '第二季度'
             WHEN quarterid = 3   then '第三季度'
             WHEN quarterid = 4   then '第四季度'
       END                                                            AS quarter_cn
      ,monthstr
      ,weekofyearid
      ,dayofyearid
      ,dayofmonthstr
      ,dayofweekid
      ,CASE  WHEN dayofweekid = 0  then '星期日'
             WHEN dayofweekid = 1  then '星期一'
             WHEN dayofweekid = 2  then '星期二'
             WHEN dayofweekid = 3  then '星期三'
             WHEN dayofweekid = 4  then '星期四'
             WHEN dayofweekid = 5  then '星期五'
             WHEN dayofweekid = 6  then '星期六'
       END                                                            AS dayofweek_cn
      --,hourstr
      --,minutestr
      ,CASE  WHEN dayofweekid = 0 or dayofweekid = 6 then 0
             ELSE 1
       END                                                            AS workday_flag
      ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8')                  AS create_time
      --,yearstr
      --,monthstr
FROM  tmp;

--生成分钟粒度维表数据
WITH datetime_t AS(
SELECt FROM_UNIXTIME(UNIX_TIMESTAMP('2000-01-01 00:00:00') + idx_t.pos*60) as datetimestr
FROM
       (
           SELECt  
               POSEXPLODE(
                   SPLIT(
                       REPEAT('o',CAST((UNIX_TIMESTAMP('2000-01-01 23:59:00') - UNIX_TIMESTAMP('2000-01-01 00:00:00'))/60 AS INT))
                       ,'o'
                   )
               )
       ) idx_t
),
tmp AS(
select
       substr(datetimestr,12)                                         AS timestr
      ,substr(datetimestr,12,2)                                       AS hourstr
      ,substr(datetimestr,15,2)                                       AS minutestr
FROM  datetime_t
)
INSERT OVERWRITE TABLE dim.dim_datetime
select 
       timestr
      ,hourstr
      ,minutestr
from  tmp 
变更节假日 变更节假日sql
INSERT INTO TABLE dim.dim_date 
SELECt 
        datetimestr  
       ,yearstr
       ,quarterid           
       ,quarter_cn   
       ,monthstr       
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,0 AS workday_flag --变更为非工作日
       --,1 AS workday_flag --变更为工作日
       ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8')                 AS create_time
FROM   dim.dim_date 
where  yearstr='2000' and monthstr='01' and dayofmonthstr='31';
变更节假日脚本 变更节假日脚本(单个)
#/bin/bash env
#update_workday_flag.sh

USAGE="Usage: 
#/bin/bash env
#update_workday_flag_batch.sh

USAGE="agrs:  [yearstr] [monthstr] [dayofmonthstr] [workday_flag]
       e.g:   2000 01 01 0"

function generate_sql(){

    if [[ $# -lt 4 ]] ; then
        echo "Invalid arguments !!!"
        echo "${USAGE}";
        exit 1;
    fi
    
    SQL_UPDATE="INSERT INTO TABLE cdm.dim_date 
         SELECT 
             datetimestr  
            ,yearstr
            ,quarterid           
            ,quarter_cn   
            ,monthstr       
            ,weekofyearid
            ,dayofyearid 
            ,dayofmonthstr
            ,dayofweekid 
            ,dayofweek_cn
            , AS workday_flag
            ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8')                 AS create_time
        FROM   cdm.dim_date 
        where  yearstr='' and monthstr='' and dayofmonthstr=''"
    
    SQL_QUERY="SELECt 
         datetimestr  
        ,yearstr
        ,quarterid           
        ,quarter_cn   
        ,monthstr       
        ,weekofyearid
        ,dayofyearid 
        ,dayofmonthstr
        ,dayofweekid 
        ,dayofweek_cn
        ,workday_flag
        ,create_time
    FROM   cdm.dim_date 
    where  yearstr='' and monthstr='' and dayofmonthstr=''"
}

for line in `cat days_*.csv`
do  
    OLD_IFS="${IFS}"
    IFS=","
    ARR=(${line})
    IFS="{OLD_IFS}"
    generate_sql ${ARR[@]}
    SQL="${SQL};${SQL_UPDATe};${SQL_QUERY}" 
done

beeline --silent=true -n hive -p 123456 -u jdbc:hive2://hadoop01:10000/cdm  -e "${SQL}" 
[yearstr] [monthstr] [dayofmonthstr] [workday_flag] e.g:
tee days_2022.csv < 2000 01 01 0"

function main(){
    if [[ $# -lt 4 ]] ; then
        echo "${USAGE}";
        exit 1;
    fi
    
    SQL_UPDATE="INSERT INTO TABLE cdm.dim_date 
         SELECT 
             datetimestr  
            ,yearstr
            ,quarterid           
            ,quarter_cn   
            ,monthstr       
            ,weekofyearid
            ,dayofyearid 
            ,dayofmonthstr
            ,dayofweekid 
            ,dayofweek_cn
            , AS workday_flag
            ,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8')                 AS create_time
        FROM   cdm.dim_date 
        where  yearstr='' and monthstr='' and dayofmonthstr=''"
    
    SQL_QUERY="SELECt 
         datetimestr  
        ,yearstr
        ,quarterid           
        ,quarter_cn   
        ,monthstr       
        ,weekofyearid
        ,dayofyearid 
        ,dayofmonthstr
        ,dayofweekid 
        ,dayofweek_cn
        ,workday_flag
        ,create_time
    FROM   cdm.dim_date 
    where  yearstr='' and monthstr='' and dayofmonthstr=''"
    
    beeline --silent=true -n hive -p 123456 -u jdbc:hive2://hadoop01:10000/cdm  -e "${SQL_UPDATe};${SQL_QUERY}" 

}

main "$@"
变更节假日脚本(批量)
--使用维表(日期粒度)
WITH tmp AS(
SELECT 
        datetimestr
       ,yearstr
       ,quarterid   
       ,quarter_cn  
       ,monthstr
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
       ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num
FROM   dim.dim_date
--WHERe yearstr='yyyy' and monthstr='MM'
WHERe yearstr='2000' and monthstr='01'
)
SELECt 
        datetimestr
       ,yearstr      
       ,quarterid   
       ,quarter_cn  
       ,monthstr     
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
FROM   tmp 
WHERe  num=1

--使用维表(小时粒度)
WITH tmp AS(
SELECt 
        datetimestr
       ,yearstr
       ,quarterid   
       ,quarter_cn  
       ,monthstr
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
       ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num
FROM   dim.dim_date
--WHERe yearstr='yyyy' and monthstr='MM'
WHERe yearstr='2000' and monthstr='01'
),
date_t AS(
SELECt 
        datetimestr
       ,yearstr      
       ,quarterid   
       ,quarter_cn  
       ,monthstr     
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
FROM   tmp 
WHERe  num=1
),
datetime_t AS(
SELECt 
        hourstr
FROM   dim.dim_datetime 
group by hourstr
)
select 
        datetimestr
       ,yearstr      
       ,quarterid   
       ,quarter_cn  
       ,monthstr     
       ,b.weekofyearid
       ,b.dayofyearid
       ,b.dayofmonthstr
       ,b.dayofweekid
       ,b.dayofweek_cn
       ,a.hourstr
       ,b.workday_flag
from   datetime_t a,date_t b

--使用维表(分钟粒度)
WITH tmp AS(
SELECt 
        datetimestr
       ,yearstr
       ,quarterid   
       ,quarter_cn  
       ,monthstr
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
       ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num
FROM   dim.dim_date
--WHERe yearstr='yyyy' and monthstr='MM'
WHERe yearstr='2000' and monthstr='01'
),
date_t AS(
SELECt 
        datetimestr
       ,yearstr      
       ,quarterid   
       ,quarter_cn  
       ,monthstr     
       ,weekofyearid
       ,dayofyearid 
       ,dayofmonthstr
       ,dayofweekid 
       ,dayofweek_cn
       ,workday_flag
FROM   tmp 
WHERe  num=1
)
select 
        concat(b.datetimestr," ",a.timestr)  AS datetimestr
       ,b.yearstr      
       ,b.quarterid   
       ,b.quarter_cn  
       ,b.monthstr     
       ,b.weekofyearid
       ,b.dayofyearid
       ,b.dayofmonthstr
       ,b.dayofweekid
       ,b.dayofweek_cn
       ,a.hourstr
       ,a.minutestr
       ,b.workday_flag
from   dim.dim_datetime a,date_t b

变更日期数据

使用日期维表

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/5688952.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-12-17
下一篇 2022-12-17

发表评论

登录后才能评论

评论列表(0条)

保存