SELECt ROW_NUMBER() OVER(ORDER BY xxxx_code) AS xxxx_key -- 代理键值 ,t.* ,CAST(CURRENT_TIMESTAMP() AS STRING) AS au_last_update_time -- 更新时间 ,'1' AS au_active_flag -- 是否有效 ,'1970-01-01' AS au_start_date -- 有效开始时间 ,'9999-12-31' AS au_end_date -- 有效结束时间 FROM 源表 t;二、增量 1.源表没有lastmodifyrecordtime情况,需要每个字段逐一比对
中间表:
将昨日全量数据插入到中间表
INSERT OVERWIRTE TABLE temp.temp_dim_channel_01
增量变更:
WITH modify_data_temp AS( --查询原表有修改或新增的数据 SELECt t1.* FROM temp.temp_dim_channel_01 t1 LEFT JOIN dim.dim_channel_zi t2 ON MD5(CONCAT_WS(',' ,t1.channel_code ,t1.channel_name ,t1.channel_level_code ,t1.channel_level_name ,t1.channel_code_L1 ,t1.channel_name_L1 ,t1.channel_code_L2 ,t1.channel_name_L2 ,t1.channel_code_L3 ,t1.channel_name_L3 ,t1.channel_code_L4 ,t1.channel_name_L4 ,t1.channel_code_L5 ,t1.channel_name_L5)) = MD5(CONCAT_WS(',' ,t2.channel_code ,t2.channel_name ,t2.channel_level_code ,t2.channel_level_name ,t2.channel_code_L1 ,t2.channel_name_L1 ,t2.channel_code_L2 ,t2.channel_name_L2 ,t2.channel_code_L3 ,t2.channel_name_L3 ,t2.channel_code_L4 ,t2.channel_name_L4 ,t2.channel_code_L5 ,t2.channel_name_L5)) AND t2.au_active_flag='1' WHERe t2.channel_code IS NULL) INSERT OVERWRITE TABLE dim.dim_channel_zi SELECt t1.channel_key -- 代理键值 ,t1.channel_code -- 渠道编码 ,t1.channel_name -- 渠道名称 ,t1.channel_level_code -- 渠道层级代码 ,t1.channel_level_name -- 渠道层级名称 ,t1.channel_code_L1 -- 一级渠道代码 ,t1.channel_name_L1 -- 一级渠道名称 ,t1.channel_code_L2 -- 二级渠道代码 ,t1.channel_name_L2 -- 二级渠道名称 ,t1.channel_code_L3 -- 三级渠道代码 ,t1.channel_name_L3 -- 三级渠道名称 ,t1.channel_code_L4 -- 四级渠道代码 ,t1.channel_name_L4 -- 四级渠道名称 ,t1.channel_code_L5 -- 五级渠道代码 ,t1.channel_name_L5 -- 五级渠道名称 ,t1.bu_flag -- BU标记 ,t1.bu_name -- BU名称 ,t1.remark_channel -- 渠道备注 ,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time -- 更新时间 ,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag -- 是否有效 ,t1.au_start_date -- 有效开始时间 ,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date -- 有效结束时间 FROM dim.dim_channel_zi t1 LEFT JOIN modify_data_temp t2 ON t1.channel_code=t2.channel_code AND t1.channel_name=t2.channel_name --必须要能唯一匹配到一条数据 UNIOn ALL SELECt ROW_NUMBER() OVER(ORDER BY t1.channel_code)+t2.max_channel_key AS channel_key ,t1.* ,CAST(CURRENT_TIMESTAMP() AS STRING) AS au_last_update_time -- 更新时间 ,'1' AS au_active_flag -- 是否有效 ,CAST(CURRENT_DATE() AS STRING) AS au_start_date -- 有效开始时间 ,'9999-12-31' AS au_end_date -- 有效结束时间 FROM modify_data_temp t1 CROSS JOIN (SELECt COALESCE(MAX(channel_key),0) AS max_channel_key FROM dim.dim_channel_zi) t2;2.源表有lastmodifyrecordtime情况
增量变更:
在这里插入代码片WITH modify_data_temp AS( --源表必须每个字段都要引用到,否则还是得每个字段逐一比对 SELECt * FROM 源表 WHERe lastmodifyrecordtime >= DATE_SUB(CURRENT_DATE(),1)) INSERT OVERWRITE TABLE dim.dim_channel_zi SELECt t1.channel_key -- 代理键值 ,t1.channel_code -- 渠道编码 ,t1.channel_name -- 渠道名称 ,t1.channel_level_code -- 渠道层级代码 ,t1.channel_level_name -- 渠道层级名称 ,t1.channel_code_L1 -- 一级渠道代码 ,t1.channel_name_L1 -- 一级渠道名称 ,t1.channel_code_L2 -- 二级渠道代码 ,t1.channel_name_L2 -- 二级渠道名称 ,t1.channel_code_L3 -- 三级渠道代码 ,t1.channel_name_L3 -- 三级渠道名称 ,t1.channel_code_L4 -- 四级渠道代码 ,t1.channel_name_L4 -- 四级渠道名称 ,t1.channel_code_L5 -- 五级渠道代码 ,t1.channel_name_L5 -- 五级渠道名称 ,t1.bu_flag -- BU标记 ,t1.bu_name -- BU名称 ,t1.remark_channel -- 渠道备注 ,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time -- 更新时间 ,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag -- 是否有效 ,t1.au_start_date -- 有效开始时间 ,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date -- 有效结束时间 FROM dim.dim_channel_zi t1 LEFT JOIN modify_data_temp t2 ON t1.channel_code=t2.channel_code --必须要能唯一匹配到一条数据,即业务主键要唯一,否则会出现数据发散 UNIOn ALL SELECt ROW_NUMBER() OVER(ORDER BY t1.channel_code)+t2.max_channel_key AS channel_key ,t1.* ,CAST(CURRENT_TIMESTAMP() AS STRING) AS au_last_update_time -- 更新时间 ,'1' AS au_active_flag -- 是否有效 ,CAST(CURRENT_DATE() AS STRING) AS au_start_date -- 有效开始时间 ,'9999-12-31' AS au_end_date -- 有效结束时间 FROM modify_data_temp t1 CROSS JOIN (SELECt COALESCE(MAX(channel_key),0) AS max_channel_key FROM dim.dim_channel_zi) t2;三、增量删除
如果源表会出现删除数据的情况,就需要考虑增量删除
INSERT OVERWRITE TABLE dim.dim_channel_zi SELECt t1.channel_key -- 代理键值 ,t1.channel_code -- 渠道编码 ,t1.channel_name -- 渠道名称 ,t1.channel_level_code -- 渠道层级代码 ,t1.channel_level_name -- 渠道层级名称 ,t1.channel_code_L1 -- 一级渠道代码 ,t1.channel_name_L1 -- 一级渠道名称 ,t1.channel_code_L2 -- 二级渠道代码 ,t1.channel_name_L2 -- 二级渠道名称 ,t1.channel_code_L3 -- 三级渠道代码 ,t1.channel_name_L3 -- 三级渠道名称 ,t1.channel_code_L4 -- 四级渠道代码 ,t1.channel_name_L4 -- 四级渠道名称 ,t1.channel_code_L5 -- 五级渠道代码 ,t1.channel_name_L5 -- 五级渠道名称 ,t1.bu_flag -- BU标记 ,t1.bu_name -- BU名称 ,t1.remark_channel -- 渠道备注 ,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time -- 更新时间 ,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag -- 是否有效 ,t1.au_start_date -- 有效开始时间 ,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date -- 有效结束时间 FROM dim.dim_channel_zi t1 LEFT JOIN temp.temp_dim_channel_01 t2 ON t1.channel_code=t2.channel_code;小计:
拉链表中对时间的更新逻辑,以下两种写法都可:
1.
--增量变更 ,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time -- 更新时间 ,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag -- 是否有效 ,t1.au_start_date -- 有效开始时间 ,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date -- 有效结束时间 --增量删除 ,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time -- 更新时间 ,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag -- 是否有效 ,t1.au_start_date -- 有效开始时间 ,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date -- 有效结束时间
--增量变更 ,IF(t2.channel_code IS NULL OR t1.au_end_date<'9999-12-31',t1.au_last_update_time,CAST(CURRENT_TIMESTAMP() AS STRING)) AS au_last_update_time -- 更新时间 ,IF(t2.channel_code IS NULL OR t1.au_end_date<'9999-12-31',t1.au_active_flag,'0') AS au_active_flag -- 是否有效 ,t1.au_start_date -- 有效开始时间 ,IF(t2.channel_code IS NULL OR t1.au_end_date<'9999-12-31',t1.au_end_date,DATE_SUB(CURRENT_DATE(),1)) AS au_end_date -- 有效结束时间 --增量删除 ,IF(t2.channel_code IS NOT NULL OR t1.au_end_date<'9999-12-31',t1.au_last_update_time,CAST(CURRENT_TIMESTAMP() AS STRING)) AS au_last_update_time -- 更新时间 ,IF(t2.channel_code IS NOT NULL OR t1.au_end_date<'9999-12-31',t1.au_active_flag,'0') AS au_active_flag -- 是否有效 ,t1.au_start_date -- 有效开始时间 ,IF(t2.channel_code IS NOT NULL OR t1.au_end_date<'9999-12-31',t1.au_end_date,DATE_SUB(CURRENT_DATE(),1)) AS au_end_date -- 有效结束时间
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)