[root@linux01 exam]# hdfs dfs -mkdir -p /app/data/exam2101 [root@linux01 exam]# hdfs dfs -put ./answer_question.log /app/data/exam2101 scala> sc.textFile("/app/data/exam2101") scala> res1.map(x=>x.split("\s+")).map(x=>(x(8),x(9))).collect.foreach(println) scala> res1.map(x=>x.split("\s+")).map(x=>(x(8),x(9))).map(x=>x._1.split("_").toList).collect.foreach(println) scala> res1.map(x=>x.split("\s+")).map(x=>(x(8),x(9))).map(x=>{var y=x._1.split("_");(y(1),y(2),y(3).substring(0,y(3).length-1))}).collect.foreach(println) //0,resp (0) scala> res1.map(x=>x.split("\s+")).map(x=>(x(8),x(9))).map(x=>{var y=x._1.split("_");(y(1),y(2),y(3).substring(0,y(3).length-1),x._2.split(",")(0))}).collect.foreach(println) res1.map(x=>x.split("_")).map(x=>(x(1),x(2),x(3))).map(x=>{var y=x._3.split("r");(x._1,x._2,y(0),y(1).trim.substring(0,y(1).trim.length-1))}).collect.foreach(println) scala> res1.map(x=>x.split("\s+")).map(x=>(x(8),x(9))).map(x=>{var y=x._1.split("_");(y(1),y(2),y(3).substring(0,y(3).length-1),x._2.split(",")(0))}).saveAsTextFile("/app/data/result2101") scala> res1.map(x=>x.split("\s+")).map(x=>(x(8),x(9))).map(x=>{var y=x._1.split("_");(y(1),y(2),y(3).substring(0,y(3).length-1),x._2.split(",")(0)).productIterator.mkString("t")}).collect.foreach(println) res1.map(x=>x.split("_")).map(x=>(x(1),x(2),x(3))).map(x=>{var y=x._3.split("r");(x._1,x._2,y(0),y(1).trim.substring(0,y(1).trim.length-1)).productIterator.mkString("t")}).saveAsTextFile("/app/data/result2101") hbase(main):005:0> create 'exam:analysis','accuracy','question'
use exam; create external table if not exists ex_exam_record( topic_id string, student_id string, question_id string, score float )row format delimited fields terminated by "t" stored as textfile location "/app/data/result2101"; select count(*) from ex_exam_record; create external table if not exists ex_exam_anlysis( student_id string, total_score float, question_count int, accuracy float )stored by 'org.apache.hadoop.hive.hbase.HbaseStorageHandler' with serdeproperties ("hbase.columns.mapping"=":key,accuracy:total_score,accuracy:question_count,accuracy:accuracy") tblproperties ("hbase.table.name"="exam:analysis"); create external table if not exists ex_exam_question( student_id string, right string, half string, error string )stored by 'org.apache.hadoop.hive.hbase.HbaseStorageHandler' with serdeproperties ("hbase.columns.mapping"=":key,question:right,question:half,question:error") tblproperties ("hbase.table.name"="exam:analysis"); insert into ex_exam_anlysis select student_id,sum(score) total_score,count(question_id) question_count, cast(sum(score)/count(question_id) as decimal(5,2)) accuracy from ex_exam_record group by student_id; select * from ex_exam_anlysis; select * from ex_exam_record; select student_id,concat_ws(",",collect_set(question_id)) as right from ex_exam_record where score=1 group by student_id; insert into ex_exam_question select student_id, concat_ws(",",collect_set(case when score=1 then question_id end )) as right, concat_ws(",",collect_set(case when score=0.5 then question_id end )) as half, concat_ws(",",collect_set(case when score=0 then question_id end )) as error from ex_exam_record group by student_id; select student_id,score, concat_ws(",",collect_set(question_id)) as questions from ex_exam_record group by student_id,score; select * from ex_exam_question;
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)