提取文件:https://pan.baidu.com/s/1kG7WvALT-ys24GPbDC8XgQ
提取码:dt5a
目录
- 根据部门编号分区,根据 薪水+补贴 降序排列
- 一、MapReduce和Partition部分:
- 二、Employee类:
数据字段依次为:员工编号,名字,职业,上级编号,日期,薪水,补贴,部门编号
代码分为两部分:(更改类名带入即可直接使用,注释文中均有注意查看)
一、MapReduce和Partition部分:package com.Partition; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Partitioner; import java.io.IOException; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.Reducer; public class EmploPartition { public static class EmpPartionMapper extends Mapper{ @Override protected void map(LongWritable key1, Text value1, Context context) throws IOException, InterruptedException { // 数据:7654,MARTIN,SALESMAN,7698,1981/9/28,1250,1400,30 String data = value1.toString(); //分词 String[] words = data.split("t"); Double comms = 0.0; if (words[6].equals("")){ //判断奖金项 是否为空 comms = Double.valueOf(0); //返回一个0 对象 }else { comms = Double.parseDouble(words[6]); } //添加 部门号 , 整行值, 薪资, 奖金, Employee employee = new Employee(Integer.parseInt(words[7]),value1.toString(),Double.parseDouble(words[5]),comms); context.write(employee,NullWritable.get()); } } //实现分区的逻辑:按照部门号进行分区 public static class MyEmployeePartitioner extends Partitioner { @Override public int getPartition(Employee longWritable, NullWritable employee, int numPartitions) { // 建立自己的分区规则:按照部门号进行分区 // 参数: numParts 分区的个数,需要在主程序中的job中设置 int deptno = longWritable.getDeptno(); //获取Employee种的deptno String detpnos =String.valueOf(deptno); //deptno转为字符串 用于equals()比较 int partition; if ("10".equals(detpnos)) { //10号部门的员工,分到0号区中 partition = 0; } else if ("20".equals(detpnos)) { //20号部门的员工,分到1号区中 partition = 1; } else if ("30".equals(detpnos)) { //30号部门的员工,分到2号区 partition = 2; } else { partition = 3; } return partition; } } //把分区后的数据写到HDFS:按照相同部门号输出 k4: 员工号 v4 员工信息 public static class EmpPartionReducer extends Reducer { @Override protected void reduce(Employee k3, Iterable v3, Context context) throws IOException, InterruptedException { //for (NullWritable e : v3) { context.write(new Text(k3.getHiredate()),NullWritable.get()); //} } } public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { // 创建一个任务job = map + reduce Job job = Job.getInstance(new Configuration()); //指定任务的入口 job.setJarByClass(EmploPartition.class); //指定任务的Map和输出的数据类型 job.setMapperClass(EmpPartionMapper.class); job.setMapOutputKeyClass(Employee.class);//员工号 job.setMapOutputValueClass(NullWritable.class);//员工对象 //设置任务的分区规则 job.setPartitionerClass(MyEmployeePartitioner.class); //指定分区的个数 job.setNumReduceTasks(4); //指定任务的Reduce和输出的数据类型 job.setReducerClass(EmpPartionReducer.class); job.setOutputKeyClass(Text.class);//员工号 job.setOutputValueClass(NullWritable.class); // 员工对象 //指定输入和输出的HDFS路径 String inpath="./datas/emp.txt"; String outpath="./output009"; FileInputFormat.addInputPath(job,new Path(inpath)); FileOutputFormat.setOutputPath(job,new Path(outpath)); FileSystem fs = FileSystem.get(new Configuration()); if (fs.exists(new Path(outpath))) { fs.delete(new Path(outpath),true); //第一个参数 删除路径文件; 第二个参数 递归删除路径下的所有文件 true开启 false 关闭 } //提交任务 job.waitForCompletion(true); } }
二、Employee类:
package com.Partition; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; //员工类: 7654,MARTIN,SALESMAN,7698,1981/9/28,1250,1400,30 public class Employee implements WritableComparable{ private String hiredate;// 定义字符串对象,在MapReduce种用于接收其余的字段 private double sal;//月薪 private double comm;//奖金 private int deptno;// 部门号 public Employee() { //空参构造 } public Employee(int deptno,String hiredate,Double sal,Double comm) { // this.name = name; // this.cls = cls; // this.score = score; this.sal=sal; this.comm=comm; this.deptno=deptno; this.hiredate = hiredate; } @Override public void write(DataOutput output) throws IOException { // 序列化:把对象输出 output.writeUTF(this.hiredate); output.writeDouble(this.sal); output.writeDouble(this.comm); output.writeInt(this.deptno); } @Override // 比较器 public int compareTo(Employee o) { return this.sal+this.comm>o.sal+o.comm?-1:1; } @Override public void readFields(DataInput input) throws IOException { // 反序列化:把对象读入 this.hiredate = input.readUTF(); this.sal = input.readDouble(); this.comm = input.readDouble(); this.deptno = input.readInt(); } public String getHiredate() { return hiredate; } public void setHiredate(String hiredate) { this.hiredate = hiredate; } public double getSal() { return sal; } public void setSal(Double sal) { this.sal = sal; } public double getComm() { return comm; } public void setComm(double comm) { this.comm = comm; } public int getDeptno() { return deptno; } public void setDeptno(int deptno) { this.deptno = deptno; } @Override public String toString() { return hiredate; } }
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)