use myhive; --1.开启动态分区 set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict;--分区模式非严格模式 --2.创建中间表 create table test1( id int, data_val string , name string, score int )row format delimited fields terminated by ','; --3.加载数据给中间表 load data local inpath ''into table test1; --4.创建分区表 create table test2( id int, name string, score int )partitioned by (data_val string) row format delimited fields terminated by ','; --5.给分区表添加数据 insert overwrite table test2 partition (data_val)select id,name ,score ,data_val from test1;
开启hive的本地模式
set hive.exec.mode.local.auto=true;二.join
- inner join:
select * from teach,course where teach.tid=course.tid;
- left join
select * from teach left join course c on teach.tid = c.tid;
- right join
select * from teach right join course c on teach.tid = c.tid;
5.满外连接
select * from teach full join course c on teach.tid = c.tid;三.排序
- order by:全局,一个reduce
select * from student left join score2 s on student.sid = s.sid order by s.score desc;
- sort by:每个reduce内部局部排序,设置reduce个数
set mapreduce.job.reduces=3; --查看reduces个数 set mapreduce.job.reduces; --查询成绩按照降序排序 select * from score2 sort by score;
导出查询结果文件
insert overwrite local directory '/export/data/hivedata/order' select * from score2 sort by score;
distribute by负责划分,sort by负责排序
select * from score2 distribute by sid sort by score;
- cluster by:按照sid划分,按照sid排序
select * from score2 cluster by sid;四. hive的内置函数
- 数学函数:
1.1取整:select round(3.1415926);
1.2指定精度取整函数:select round(3.1415926,4)
select cid , round(avg(score)) from score2 group by cid;
1.3 向下取整:select floor(3.155);
1.4向上取整:select ceil(3.14);
1.5随机函数:select rand();
1.6幂运算:select pow(2,4)2的4次方
1.7绝对值:select abs(4);
2.字符串函数
2.1 字符串长度:select length(‘ddddddddd’);
2.2字符串反转:select reverse(‘ssffjs’);
2.3字符串连接: select concat(‘2021-12-21’,‘10:38:11’);
2.4指定拼接分隔符:select concat_ws(‘ ’,‘2021-12-21’,‘10:38:11’);
2.5字符串截取:select substr(‘abcdse’,3);
2.6大写lower(小写)ucase()
2.7去空格:trim()
2.8解析url :select parse_url(‘http://facebook.com/path1/p.php?k1=v1&k2=v2’,‘HOST’);
select parse_url(‘http://facebook.com/path1/p.php?k1=v1&k2=v2’,‘PATH’);
select parse_url(‘http://facebook.com/path1/p.php?k1=v1&k2=v2’,‘QUERY’);
2.9分割字符转:select split(‘abcdfrhdhdhhdhdlkh’,‘h’);
- 日期函数
3.1获取unix时间戳函数:select unix_timestamp();
3.2 转换成指定格式时间:select from_unixtime(156325255,‘yyyy-MM-dd HH:mm:ss’);
3.3时间转换成标准的:select date_format(‘2020-1-1 1:1:1’,‘yyyy-MM-dd HH:mm:ss’)
3.4 获取日期:select to_date(‘2021-10-25 10:3:56’)
3.5获取年:select year(‘2012-12-08’)
3.6获取周:select weekofyear();
3.7日期的比较:select datediff(‘2021-12-08’,‘2021-10-09’);
3.8日期的增加:date_add(‘2021-12-08’,10) 日期减少:date_sub(‘2021-12-08’,10)条件函数:
5.1if
select *,if( score<60,'不及格','及格') from score2;
5.2
select case 100 when 50 then 'tom' when 100 then 'jjdj' else 'ty' end;
5.3
select case when 1=2 then 'tom' when 2=2 then 'mary' else 'tin' end;
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)