-- 创建一个测试库 CREATE DATAbase dw; -- 创建行转列测试数据表 CREATE TABLE IF NOT EXISTS dw.person ( name string, gender string, age int, hobby string ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE ; -- 数据内容 jack,male,20,music marry,female,18,dance carry,male,22,basketball tom,male,24,movie jack,male,20,read marry,female,18,run tom,male,24,movie LOAD DATA LOCAL INPATH '/opt/data/person.csv' INTO TABLE dw.person; -- 创建列转行测试数据表 CREATE TABLE IF NOT EXISTS dw.user_tag ( name string, tags string ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE ; -- 数据内容 tom,T0001/T0002/T0003 jack,T0002/T0001 marry,T0003/T0004 mac,T0002/T0005 LOAD DATA LOCAL INPATH '/opt/data/tag.csv' INTO TABLE dw.user_tag;行转列
使用函数:concat_ws(",", collect_set(col)) 注意col字段必须为string类型
collect_set可以置换成collect_list,但是两者有点细微的区别
- collect_set:会对多行转列的字段去重
- collect_list:不会对多行转列的字段去重
-- 行转列:把同一个人的爱好使用,分隔组合,并进行去重 SELECt name, gender, age, concat_ws(",", collect_set(hobby)) AS hobbys FROM dw.person GROUP BY name, gender, age ; +-----+------+---+----------+ |name |gender|age|hobbys | +-----+------+---+----------+ |tom |male |24 |movie | |carry|male |22 |basketball| |jack |male |20 |music,read| |marry|female|18 |run,dance | +-----+------+---+----------+ -- 行转列:把同一个人的爱好使用,分隔组合,不进行去重 SELECt name, gender, age, concat_ws(",", collect_list(hobby)) AS hobbys FROM dw.person GROUP BY name, gender, age ; +-----+------+---+-----------+ |name |gender|age|hobbys | +-----+------+---+-----------+ |tom |male |24 |movie,movie| |carry|male |22 |basketball | |jack |male |20 |music,read | |marry|female|18 |dance,run | +-----+------+---+-----------+列转行
使用函数:LATERAL VIEW explode(split(col, “/”)) tbl AS tag
参数解析
- col:需要拆分的列字段
- /:col字段分隔符
- tbl:列转行后的表别名
- tag:拆分后的字段名
SELECt name, tag FROM dw.user_tag LATERAL VIEW explode(split(tags, "/")) tbl AS tag ; +-----+-----+ |name |tag | +-----+-----+ |tom |T0001| |tom |T0002| |tom |T0003| |jack |T0002| |jack |T0001| |marry|T0003| |marry|T0004| |mac |T0002| |mac |T0005| +-----+-----+
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)