mapreducer wordcount 案例
1 添加依赖
2 编写Mapper类
3 编写Reducer类
4 编写Driver驱动类
2 mapper4.0.0 com MapReduceDemo1.0-SNAPSHOT org.apache.hadoop hadoop-client3.1.3 junit junit4.12 org.slf4j slf4j-log4j121.7.30 maven-compiler-plugin 3.6.1 1.8 maven-assembly-plugin jar-with-dependencies make-assembly package single
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet; import java.io.IOException; public class WordCountMapper extends Mapper3 reducer{ private Text text = new Text(); private IntWritable outv=new IntWritable(1); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words = line.split(" "); // 3 循环写入 for (String word: words){ text.set(word); context.write(text,outv); } } }
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class WordCountReducer extends Reducer4 Driver{ private IntWritable outv=new IntWritable(); @Override protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { int sum=0; for (IntWritable value : values) { sum+=value.get(); } outv.set(sum); context.write(key,outv); } }
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import java.io.IOException; public class WordCountDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { // 1 获取job Job job = Job.getInstance(new Configuration()); //2 设置jar 包路径 job.setJarByClass(WordCountDriver.class); //3 设置mapper and areducer job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); //4 set map 输出的kv 类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //5 设置最终输出的kv 类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // 6 设置输入路径和输出路径 FileInputFormat.setInputPaths(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); // 7 提交job boolean result = job.waitForCompletion(true); System.exit( result ? 0:1); } }
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)