一、环境准备
1 eclipse Juno Service Release 4.2的本
2 *** 作系统 Windows7
3 hadoop的eclipse插件 hadoop-eclipse-plugin-2.2.0.jar
4 hadoop的集群环境 虚拟机Linux的Centos6.5单机伪分布式
5 调试程序 Hellow World
二、注意事项:
异常如下:
java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
解决办法:
在org.apache.hadoop.util.Shell类的checkHadoopHome()方法的返回值里写固定的
private static String checkHadoopHome() {
// first check the Dflag hadoop.home.dir with JVM scope
//System.setProperty("hadoop.home.dir", "...")
String home = System.getProperty("hadoop.home.dir")
// fall back to the system/user-global env variable
if (home == null) {
home = System.getenv("HADOOP_HOME")
}
try {
// couldn't find either setting for hadoop's home directory
if (home == null) {
throw new IOException("HADOOP_HOME or hadoop.home.dir are not set.")
}
if (home.startsWith("\"") &&home.endsWith("\"")) {
home = home.substring(1, home.length()-1)
}
// check that the home setting is actually a directory that exists
File homedir = new File(home)
if (!homedir.isAbsolute() || !homedir.exists() || !homedir.isDirectory()) {
throw new IOException("Hadoop home directory " + homedir
+ " does not exist, is not a directory, or is not an absolute path.")
}
home = homedir.getCanonicalPath()
} catch (IOException ioe) {
if (LOG.isDebugEnabled()) {
LOG.debug("Failed to detect a valid hadoop home directory", ioe)
}
home = null
}
//固定本机的hadoop地址
home="D:\\hadoop-2.2.0"
return home
}
第二个异常,Could not locate executable D:\Hadoop\tar\hadoop-2.2.0\hadoop-2.2.0\bin\winutils.exe in the Hadoop binaries. 找不到win上的执行程序,可以去https://github.com/srccodes/hadoop-common-2.2.0-bin下载bin包,覆盖本机的hadoop跟目录下的bin包即可
第三个异常:
Exception in thread "main" java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.130.54:19000/user/hmail/output/part-00000, expected: file:///
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
at org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:47)
at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:357)
at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:245)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:125)
at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:283)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:356)
at com.netease.hadoop.HDFSCatWithAPI.main(HDFSCatWithAPI.java:23)
出现这个异常,一般是HDFS的路径写的有问题,解决办法,拷贝集群上的core-site.xml和hdfs-site.xml文件,放在eclipse的src根目录下即可。
package com.qin.wordcount
import java.io.IOException
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.IntWritable
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.Mapper
import org.apache.hadoop.mapreduce.Reducer
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
/***
*
* Hadoop2.2.0测试
* 放WordCount的例子
*
* @author qindongliang
*
* hadoop技术交流群: 376932160
*
*
* */
public class MyWordCount {
/**
* Mapper
*
* **/
private static class WMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
private IntWritable count=new IntWritable(1)
private Text text=new Text()
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String values[]=value.toString().split("#")
//System.out.println(values[0]+"========"+values[1])
count.set(Integer.parseInt(values[1]))
text.set(values[0])
context.write(text,count)
}
}
/**
* Reducer
*
* **/
private static class WReducer extends Reducer<Text, IntWritable, Text, Text>{
private Text t=new Text()
@Override
protected void reduce(Text key, Iterable<IntWritable>value,Context context)
throws IOException, InterruptedException {
int count=0
for(IntWritable i:value){
count+=i.get()
}
t.set(count+"")
context.write(key,t)
}
}
/**
* 改动一
* (1)shell源码里添加checkHadoopHome的路径
* (2)974行,FileUtils里面
* **/
public static void main(String[] args) throws Exception{
// String path1=System.getenv("HADOOP_HOME")
// System.out.println(path1)
// System.exit(0)
JobConf conf=new JobConf(MyWordCount.class)
//Configuration conf=new Configuration()
//conf.set("mapred.job.tracker","192.168.75.130:9001")
//读取person中的数据字段
// conf.setJar("tt.jar")
//注意这行代码放在最前面,进行初始化,否则会报
/**Job任务**/
Job job=new Job(conf, "testwordcount")
job.setJarByClass(MyWordCount.class)
System.out.println("模式: "+conf.get("mapred.job.tracker"))
// job.setCombinerClass(PCombine.class)
// job.setNumReduceTasks(3)//设置为3
job.setMapperClass(WMapper.class)
job.setReducerClass(WReducer.class)
job.setInputFormatClass(TextInputFormat.class)
job.setOutputFormatClass(TextOutputFormat.class)
job.setMapOutputKeyClass(Text.class)
job.setMapOutputValueClass(IntWritable.class)
job.setOutputKeyClass(Text.class)
job.setOutputValueClass(Text.class)
String path="hdfs://192.168.46.28:9000/qin/output"
FileSystem fs=FileSystem.get(conf)
Path p=new Path(path)
if(fs.exists(p)){
fs.delete(p, true)
System.out.println("输出路径存在,已删除!")
}
FileInputFormat.setInputPaths(job, "hdfs://192.168.46.28:9000/qin/input")
FileOutputFormat.setOutputPath(job,p )
System.exit(job.waitForCompletion(true) ? 0 : 1)
}
}
直接在这里回复你吧。首先你有两台机器竟然选择cygwin,无语,建议换成两台虚拟机,安装Linux系统,然后安装Hadoop,这样的话我觉得就不会有什么问题了。Cygwin真心不适合玩Hadoop,问题一堆而且解决起来都很麻烦欢迎分享,转载请注明来源:内存溢出
评论列表(0条)