package com.a.b.partion_demo; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Partitioner; public class MyPartioner extends Partitioner{ @Override public int getPartition(Text text, NullWritable nullWritable, int i) { String[] strings = text.toString().split("t"); System.out.println(strings[5]); if(Integer.parseInt(strings[5])>15){ return 1; }else{ return 0; } } }
package com.a.b.partion_demo; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class PartionMain extends Configured implements Tool { @Override public int run(String[] strings) throws Exception { Job job = Job.getInstance(new Configuration(),"partion_demo"); job.setJarByClass(PartionMain.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(job,new Path("hdfs://node01:8020/partition")); TextOutputFormat.setOutputPath(job,new Path("hdfs://node01:8020/partition_out")); job.setMapperClass(PartionMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setReducerClass(PartionReduce.class); //分区,分区个数 job.setPartitionerClass(MyPartioner.class); job.setNumReduceTasks(2); boolean b = job.waitForCompletion(true); return b?0:1; } public static void main(String[] args) throws Exception { int i = ToolRunner.run(new Configuration(),new PartionMain(),args); System.exit(i); } }
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)