目录
实验内容
实验命令 *** 作
实验源码
实验结果
实验报告
实验内容
熟悉在Linux使用Java编写Mapreduce程序,编写wordcount程序,找出词频排在前十的词。
实验命令 *** 作打开终端,根据一下提示,输入命令:
启动hadoop
cd /usr/local/hadoop
./sbin/start-dfs.sh判断hadoop是否启动成功,成功会有显示
jps查看jar包
cd /usr/local/hadoop/myapp
ls
上传词频文件cipin.txt 将其存放在input文件夹中存放的内容是你要统计词频的语段
./bin/hdfs dfs -put /home/hadoop/cipin.txt input查看hadoop目录 查看input文件夹下的内容
./bin/hdfs dfs -ls
./bin/hdfs dfs -ls input删除output文件夹,不删除会报错
./bin/hdfs dfs -rm -r output./运行实验3程序
./bin/hadoop jar ./myapp/shiyan3.jar input output
显示运行结果
./bin/hdfs dfs -cat output/*关闭hadoop
./sbin/stop-dfs.sh
PS:cipin.txt的内容需要自己编写。可以在本地编辑好了,再上传到hadoop。
实验源码package sy; import java.io.IOException; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.linkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class shiyan3 { public static class WsMapper extends Mapper实验结果 实验报告{ @Override public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException { String[] split = value.toString().split(" "); for (String word : split) { context.write(new Text(word), new IntWritable(1)); } } } public static class WsReducer extends Reducer { Map map=new HashMap (); public void reduce(Text key, Iterable iter,Context conext) throws IOException, InterruptedException { int count=0; for (IntWritable wordCount : iter) { count+=wordCount.get(); } String name=key.toString(); map.put(name, count); } @Override public void cleanup(Context context)throws IOException, InterruptedException { //这里将map.entrySet()转换成list List > list=new linkedList >(map.entrySet()); //通过比较器来实现排序 Collections.sort(list,new Comparator >() { //降序排序 @Override public int compare(Entry arg0,Entry arg1) { return (int) (arg1.getValue() - arg0.getValue()); } }); for(int i=0;i<10;i++){ context.write(new Text(list.get(i).getKey()), new IntWritable(list.get(i).getValue())); } } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("输入参数个数为:"+otherArgs.length+",Usage: wordcount "); System.exit(2);//终止当前正在运行的java虚拟机 } Job job = Job.getInstance(conf, "CleanUpJob"); job.setJarByClass(shiyan3.class); job.setMapperClass(WsMapper.class); job.setReducerClass(WsReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for(int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true)?0:1); } }
《大数据导论》MapReduce的应用.docx-Hadoop文档类资源-CSDN下载
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)