概述#!/bin/sh #数据库连接 srcConnect="jdbc:sqlserver://10.2.44.181:1433;username=mc_testor;password=0000002am;database=Gree_MonitorCenter_New" #表名 dstTabName=wp04_0013_data checkColumn="RowKey" condiColumn="[记 #!/bin/sh #数据库连接 srcConnect="jdbc:sqlserver://10.2.44.181:1433;username=mc_testor;password=0000002am;database=Gree_MonitorCenter_New" #表名 dstTabname=wp04_0013_data checkColumn="RowKey" condiColumn="[记录时间]" startRecoID=2017-01-06 plus=1 endRecodID=2017-01-11 echo "from ${startRecoID} to ${endRecodID}" > log.out #hbase表名 hbasetablename=wp04_0013_data_ #hbase存放的目录 hbasetablePath=/user/hbase/biz1/wp04_0013_data #hbase临时存放目录 hbasetempPath=/user/hive/biz1/${hbasetablename}temp #要执行sql sql="select reverse(right('0000000000'+ltrim(a.[ProjDev_ID]),10))+ replace(CONVERT(varchar(12),a.[记录时间],108),':','')+'7F15'+right('000000'+ltrim([Reco_ID]),6)as RowKey ,CONVERT(varchar(19),121) +','+ cast (b.Proj_ID as varchar ) +','+ cast (a.[ProjDev_ID] as varchar ) +','+ '224' +','+ '' +','+ '' +','+ '7F15' +','+ cast (a.[Reco_ID] as varchar ) +','+'' as c1 from [dbo].[0013_7F15_IDUStateInfo_RSimulateData] a left join Proj_Dev b on a.ProjDev_ID=b.ProjDev_ID " #分批
导入 while [[ $startRecoID != $endRecodID ]]; do inter=`date -d "1 day $startRecoID" +%Y-%m-%d` temp_endRecodID=`echo $endRecodID | awk '{print $1}'` end_time_s=`date -d $temp_endRecodID +%s` temp_inter=`echo $inter | awk '{print $1}'` temphbase_inter=`echo $inter | awk '{print $1}' | tr -d "-"` inter_time_s=`date -d $temp_inter +%s` if [ $inter_time_s -gt $end_time_s ] then inter=$endRecodID fi temp_startRecoID=`echo $startRecoID | awk '{print $1}' | tr -d "-"` starttime=${startRecoID}" 00:00:00:000" # by ljn add endtime=${startRecoID}" 23:59:59:997" # by ljn add 2015-11-22 23:58:01.000 23:59:59:997 hadoop fs -rm -r ${hbasetablePath}/${hbasetablename}${temp_startRecoID} #hadoop fs -mkdir ${hbasetablePath} 1>/dev/null 2>&1 hadoop fs -rm -r ${TemphbasePath}${temp_startRecoID} 1>/dev/null 2>&1 echo "开始导入hbases数据:++++++++++++++++++++++++++++++++" sqoop import -D mapreduce.job.queuename=production --connect "${srcConnect}" \ --query "select t.* from (${sql} where ${condiColumn}>='${starttime}' and ${condiColumn}<='${endtime}' ) t WHERE \$CONDITIONS" \ --split-by ${checkColumn} --fIElds-terminated-by "\t" --lines-terminated-by "\n" \ --target-dir ${TemphbasePath}${temp_startRecoID} \ -m 15 hadoop fs -test -e ${TemphbasePath}${temp_startRecoID} if [ $? -ne 0 ] then echo "+++++++++++++++++++++++文件不存在+++++++++++++++++++++++++" echo "+++++++++++++++++++++现在截断时间是: $startRecoID +++++++++++++++" break fi startRecoID=$inter partf1=`hadoop fs -du -s ${TemphbasePath}${temp_startRecoID} |awk '{print $1 }'` if [ "$partf1" == "0" ] then hadoop fs -rm -r ${TemphbasePath}${temp_startRecoID} 1>/dev/null 2>&1 echo "---------------file is null partf1 :$partf1-----------------------" continue fi hbase org.apache.hadoop.hbase.mapreduce.importTsv '-Dimporttsv.separator= ' -Dimporttsv.columns='HBASE_ROW_KEY,f1:c1' -Dimporttsv.bulk.output=${hbasetablePath}/${hbasetablename}${temp_startRecoID} biz1:${hbasetablename}${temp_startRecoID} ${TemphbasePath}${temp_startRecoID} echo "此时的查询时间值为:+++++++++++++++++++++++++++++${temp_startRecoID}" echo "此时的hbase存放目录值为:+++++++++++++++++++++++++++++${hbasetablePath}/${hbasetablename}${temp_startRecoID}" echo "此时的表名为:+++++++++++++++++++++++++++++${hbasetablename}${temp_startRecoID}" echo "表临时文件目录为:+++++++++++++++++++++++++++++${TemphbasePath}${temp_startRecoID}" #ps: #f1为列簇名 row key必须指定 #-Dimporttsv.bulk.output 指定目录 #smy_test 要生成的表(表不会生成) #/user/hdfs/sqlserver hdfs存放数据的目录 hadoop dfs -chmod -R 777 ${hbasetablePath} #从hfile中load到hbase 表中(这步可以生成表) hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHfiles ${hbasetablePath}/${hbasetablename}${temp_startRecoID} biz1:${hbasetablename}${temp_startRecoID} hadoop fs -rm -r ${hbasetablePath}/${hbasetablename}${temp_startRecoID} hadoop fs -rm -r ${TemphbasePath}${temp_startRecoID} 1>/dev/null 2>&1 echo "-------------循环完成---------------------------" done echo "load Data all over\n" >> log.out 总结
以上是内存溢出为你收集整理的sqoop1,将sqlserver导入hbase全部内容,希望文章能够帮你解决sqoop1,将sqlserver导入hbase所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
评论列表(0条)