创建hive拉链表

创建hive拉链表,第1张

创建hive拉链表 一、初始化
SELECt
 ROW_NUMBER() OVER(ORDER BY xxxx_code)    AS xxxx_key              -- 代理键值
,t.*
,CAST(CURRENT_TIMESTAMP() AS STRING)      AS au_last_update_time   -- 更新时间
,'1'                                      AS au_active_flag        -- 是否有效
,'1970-01-01'                             AS au_start_date         -- 有效开始时间
,'9999-12-31'                             AS au_end_date           -- 有效结束时间
FROM 源表 t;
二、增量 1.源表没有lastmodifyrecordtime情况,需要每个字段逐一比对

中间表:
将昨日全量数据插入到中间表

INSERT OVERWIRTE TABLE temp.temp_dim_channel_01

增量变更:

WITH modify_data_temp AS(             --查询原表有修改或新增的数据
SELECt t1.*
FROM temp.temp_dim_channel_01 t1
LEFT JOIN dim.dim_channel_zi t2
ON  
MD5(CONCAT_WS(','
,t1.channel_code      
,t1.channel_name      
,t1.channel_level_code
,t1.channel_level_name
,t1.channel_code_L1   
,t1.channel_name_L1   
,t1.channel_code_L2   
,t1.channel_name_L2   
,t1.channel_code_L3   
,t1.channel_name_L3   
,t1.channel_code_L4   
,t1.channel_name_L4   
,t1.channel_code_L5   
,t1.channel_name_L5)) =
MD5(CONCAT_WS(','
,t2.channel_code      
,t2.channel_name      
,t2.channel_level_code
,t2.channel_level_name
,t2.channel_code_L1   
,t2.channel_name_L1   
,t2.channel_code_L2   
,t2.channel_name_L2   
,t2.channel_code_L3   
,t2.channel_name_L3   
,t2.channel_code_L4   
,t2.channel_name_L4   
,t2.channel_code_L5   
,t2.channel_name_L5))
AND t2.au_active_flag='1' 
WHERe t2.channel_code IS NULL)
INSERT OVERWRITE TABLE dim.dim_channel_zi
SELECt 
 t1.channel_key           -- 代理键值
,t1.channel_code          -- 渠道编码
,t1.channel_name          -- 渠道名称
,t1.channel_level_code    -- 渠道层级代码
,t1.channel_level_name    -- 渠道层级名称
,t1.channel_code_L1       -- 一级渠道代码
,t1.channel_name_L1       -- 一级渠道名称
,t1.channel_code_L2       -- 二级渠道代码
,t1.channel_name_L2       -- 二级渠道名称
,t1.channel_code_L3       -- 三级渠道代码
,t1.channel_name_L3       -- 三级渠道名称
,t1.channel_code_L4       -- 四级渠道代码
,t1.channel_name_L4       -- 四级渠道名称
,t1.channel_code_L5       -- 五级渠道代码
,t1.channel_name_L5       -- 五级渠道名称
,t1.bu_flag               -- BU标记
,t1.bu_name               -- BU名称
,t1.remark_channel        -- 渠道备注
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time  -- 更新时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag       -- 是否有效 
,t1.au_start_date         -- 有效开始时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date  -- 有效结束时间
FROM dim.dim_channel_zi t1 
LEFT JOIN modify_data_temp t2
ON  t1.channel_code=t2.channel_code
AND t1.channel_name=t2.channel_name       --必须要能唯一匹配到一条数据
UNIOn ALL
SELECt
 ROW_NUMBER() OVER(ORDER BY t1.channel_code)+t2.max_channel_key AS channel_key
,t1.*
,CAST(CURRENT_TIMESTAMP() AS STRING)      AS au_last_update_time   -- 更新时间
,'1'                                      AS au_active_flag        -- 是否有效
,CAST(CURRENT_DATE() AS STRING)           AS au_start_date         -- 有效开始时间
,'9999-12-31'                             AS au_end_date           -- 有效结束时间
FROM modify_data_temp t1
CROSS JOIN (SELECt COALESCE(MAX(channel_key),0) AS max_channel_key FROM dim.dim_channel_zi) t2;
2.源表有lastmodifyrecordtime情况

增量变更:

在这里插入代码片WITH modify_data_temp AS(   --源表必须每个字段都要引用到,否则还是得每个字段逐一比对
SELECt *
FROM 源表
WHERe lastmodifyrecordtime >= DATE_SUB(CURRENT_DATE(),1))
INSERT OVERWRITE TABLE dim.dim_channel_zi
SELECt 
 t1.channel_key           -- 代理键值
,t1.channel_code          -- 渠道编码
,t1.channel_name          -- 渠道名称
,t1.channel_level_code    -- 渠道层级代码
,t1.channel_level_name    -- 渠道层级名称
,t1.channel_code_L1       -- 一级渠道代码
,t1.channel_name_L1       -- 一级渠道名称
,t1.channel_code_L2       -- 二级渠道代码
,t1.channel_name_L2       -- 二级渠道名称
,t1.channel_code_L3       -- 三级渠道代码
,t1.channel_name_L3       -- 三级渠道名称
,t1.channel_code_L4       -- 四级渠道代码
,t1.channel_name_L4       -- 四级渠道名称
,t1.channel_code_L5       -- 五级渠道代码
,t1.channel_name_L5       -- 五级渠道名称
,t1.bu_flag               -- BU标记
,t1.bu_name               -- BU名称
,t1.remark_channel        -- 渠道备注
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time  -- 更新时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag       -- 是否有效 
,t1.au_start_date         -- 有效开始时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date  -- 有效结束时间
FROM dim.dim_channel_zi t1 
LEFT JOIN modify_data_temp t2
ON  t1.channel_code=t2.channel_code    --必须要能唯一匹配到一条数据,即业务主键要唯一,否则会出现数据发散
UNIOn ALL
SELECt
 ROW_NUMBER() OVER(ORDER BY t1.channel_code)+t2.max_channel_key AS channel_key
,t1.*
,CAST(CURRENT_TIMESTAMP() AS STRING)      AS au_last_update_time   -- 更新时间
,'1'                                      AS au_active_flag        -- 是否有效
,CAST(CURRENT_DATE() AS STRING)           AS au_start_date         -- 有效开始时间
,'9999-12-31'                             AS au_end_date           -- 有效结束时间
FROM modify_data_temp t1
CROSS JOIN (SELECt COALESCE(MAX(channel_key),0) AS max_channel_key FROM dim.dim_channel_zi) t2;
三、增量删除

如果源表会出现删除数据的情况,就需要考虑增量删除

INSERT OVERWRITE TABLE dim.dim_channel_zi
SELECt
     t1.channel_key           -- 代理键值
    ,t1.channel_code          -- 渠道编码
    ,t1.channel_name          -- 渠道名称
    ,t1.channel_level_code    -- 渠道层级代码
    ,t1.channel_level_name    -- 渠道层级名称
    ,t1.channel_code_L1       -- 一级渠道代码
    ,t1.channel_name_L1       -- 一级渠道名称
    ,t1.channel_code_L2       -- 二级渠道代码
    ,t1.channel_name_L2       -- 二级渠道名称
    ,t1.channel_code_L3       -- 三级渠道代码
    ,t1.channel_name_L3       -- 三级渠道名称
    ,t1.channel_code_L4       -- 四级渠道代码
    ,t1.channel_name_L4       -- 四级渠道名称
    ,t1.channel_code_L5       -- 五级渠道代码
    ,t1.channel_name_L5       -- 五级渠道名称
    ,t1.bu_flag               -- BU标记
    ,t1.bu_name               -- BU名称
    ,t1.remark_channel        -- 渠道备注
    ,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time  -- 更新时间
    ,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag       -- 是否有效 
    ,t1.au_start_date         -- 有效开始时间
    ,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date  -- 有效结束时间
FROM dim.dim_channel_zi t1 
LEFT JOIN temp.temp_dim_channel_01 t2   
ON t1.channel_code=t2.channel_code;
小计:

拉链表中对时间的更新逻辑,以下两种写法都可:
1.

--增量变更
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time  -- 更新时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag       -- 是否有效 
,t1.au_start_date         -- 有效开始时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date  -- 有效结束时间

 --增量删除
,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time  -- 更新时间
,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag       -- 是否有效 
,t1.au_start_date         -- 有效开始时间
,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date  -- 有效结束时间
    --增量变更
    ,IF(t2.channel_code IS NULL OR t1.au_end_date<'9999-12-31',t1.au_last_update_time,CAST(CURRENT_TIMESTAMP() AS STRING)) AS au_last_update_time  -- 更新时间
    ,IF(t2.channel_code IS NULL OR t1.au_end_date<'9999-12-31',t1.au_active_flag,'0') AS au_active_flag       -- 是否有效 
    ,t1.au_start_date         -- 有效开始时间
    ,IF(t2.channel_code IS NULL OR t1.au_end_date<'9999-12-31',t1.au_end_date,DATE_SUB(CURRENT_DATE(),1)) AS au_end_date  -- 有效结束时间
    
    --增量删除
    ,IF(t2.channel_code IS NOT NULL OR t1.au_end_date<'9999-12-31',t1.au_last_update_time,CAST(CURRENT_TIMESTAMP() AS STRING)) AS au_last_update_time  -- 更新时间
    ,IF(t2.channel_code IS NOT NULL OR t1.au_end_date<'9999-12-31',t1.au_active_flag,'0') AS au_active_flag       -- 是否有效 
    ,t1.au_start_date         -- 有效开始时间
    ,IF(t2.channel_code IS NOT NULL OR t1.au_end_date<'9999-12-31',t1.au_end_date,DATE_SUB(CURRENT_DATE(),1)) AS au_end_date  -- 有效结束时间
    

    欢迎分享,转载请注明来源:内存溢出

    原文地址: https://outofmemory.cn/zaji/5717163.html

    (0)
    打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
    上一篇 2022-12-17
    下一篇 2022-12-18

    发表评论

    登录后才能评论

    评论列表(0条)

    保存