1.Hadoop下载好之后解压到相应目录:
为了方便管理,我们使用mv把名称更改为hadoop
[root@master ~]# tar -zxvf hadoop-2.7.1.tar.gz -C /usr/local/src/ [root@master ~]# cd /usr/local/src/ [root@master src]# ls hadoop-2.7.1 java zookeeper [root@master src]# mv hadoop-2.7.1/ hadoop [root@master src]# ls hadoop java zookeeper
2.配置Hadoop的环境变量
[root@master ~]# vi /etc/profile #hadoop export HADOOP_HOME=/usr/local/src/hadoop export HADOOP_PREFIX=$HADOOP_HOME export HADOOP_INSTALL=$HADOOP_HOME export YARN_HOME=$HADOOP_HOME export HADOOP_COMMON_HOME=$HADOOP_HOME export HADOOP_HDFS_HOME=$HADOOP_HOME export HADOOP_MAPRED_HOME=$HADOOP_HOME export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/bin/native export HADOOP_OPTS="-Djava.library.path=$HADOOP_INSTALL/lib:$HADOOP_COMMON_LIB_NATIVE_DIR" export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin //生效环境变量 [root@master ~]# source /etc/profile
3.配置hadoop-env.sh配置文件
进入到hadoop/etc/hadoop下面
[root@master ~]# cd /usr/local/src/ [root@master src]# cd hadoop/etc/hadoop/ [root@master hadoop]# ls capacity-scheduler.xml hadoop-env.sh httpfs-env.sh kms-env.sh mapred-env.sh ssl-server.xml.example configuration.xsl hadoop-metrics2.properties httpfs-log4j.properties kms-log4j.properties mapred-queues.xml.template yarn-env.cmd container-executor.cfg hadoop-metrics.properties httpfs-signature.secret kms-site.xml mapred-site.xml.template yarn-env.sh core-site.xml hadoop-policy.xml httpfs-site.xml log4j.properties slaves yarn-site.xml hadoop-env.cmd hdfs-site.xml kms-acls.xml mapred-env.cmd ssl-client.xml.example [root@master hadoop]# vi hadoop-env.sh //将Java的路径修改为自己的绝对路径 # The java implementation to use. export JAVA_HOME=/usr/local/src/java
4.创建namenode,datanode,journalnode等存放数据的目录
[root@master hadoop]# pwd /usr/local/src/hadoop [root@master hadoop]# mkdir -p tmp/hdfs/nn [root@master hadoop]# mkdir -p tmp/hdfs/dn [root@master hadoop]# mkdir -p tmp/hdfs/jn [root@master hadoop]# mkdir -p tmp/logs
5.配置core-site.xml文件
core-site.xml文件是Hadoop 核心配置,例如HDFS、MapReduce和YARN常用的I/O设置等
[root@master hadoop]# pwd /usr/local/src/hadoop/etc/hadoop [root@master hadoop]# vi core-site.xml //文件core-site.xml的具体配置如下:fs.defaultFS hdfs://mycluster hadoop.tmp.dir file:/usr/local/src/hadoop/tmp ha.zookeeper.quorum master:2181,slave1:2181,slave2:2181 ha.zookeeper.session-timeout.ms 30000 ms fs.trash.interval 1440
6.配置hadoop的hdfs-site.xml文件
hdfs-site.xml文件是Hadoop守护进程的配置项,包括namenode、辅助namenode(即SecondNameNode)和datanode等
[root@master hadoop]# vi hdfs-site.xmldfs.qjournal.start-segment.timeout.ms 60000 dfs.nameservices mycluster dfs.ha.namenodes.mycluster master,slave1 dfs.namenode.rpc-address.mycluster.master master:8020 dfs.namenode.rpc-address.mycluster.slave1 slave1:8020 dfs.namenode.http-address.mycluster.master master:50070 dfs.namenode.http-address.mycluster.slave1 slave1:50070 dfs.namenode.shared.edits.dir qjournal://master:8485;slave1:8485;slave2:8485/mycluster dfs.client.failover.proxy.provider.mycluster org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider dfs.ha.fencing.methods sshfence shell(/bin/true) dfs.permissions.enabled false dfs.support.append true dfs.ha.fencing.ssh.private-key-files /root/.ssh/id_rsa dfs.replication 2 dfs.namenode.name.dir slave1:8020 dfs.namenode.http-address.mycluster.master master:50070 dfs.namenode.http-address.mycluster.slave1 slave1:50070 dfs.namenode.shared.edits.dir qjournal://master:8485;slave1:8485;slave2:8485/mycluster dfs.client.failover.proxy.provider.mycluster org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider dfs.ha.fencing.methods sshfence shell(/bin/true) dfs.permissions.enabled false dfs.support.append true dfs.ha.fencing.ssh.private-key-files /root/.ssh/id_rsa dfs.replication 2 dfs.namenode.name.dir /usr/local/src/hadoop/tmp/hdfs/nn dfs.datanode.data.dir /usr/local/src/hadoop/tmp/hdfs/dn dfs.journalnode.edits.dir /usr/local/src/hadoop/tmp/hdfs/jn dfs.ha.automatic-failover.enabled true dfs.webhdfs.enabled true dfs.ha.fencing.ssh.connect-timeout 30000 ha.failover-controller.cli-check.rpc-timeout.ms 60000
7.配置hadoop的mapred-site.xml文件( MapReduce守护进程的配置项,包括作业历史服务器 )
[root@master hadoop]# cp mapred-site.xml.template mapred-site.xml [root@master hadoop]# vi mapred-site.xmlmapreduce.framework.name yarn mapreduce.jobhistory.address master:10020 mapreduce.jobhistory.webapp.address master:19888
8.配置Hadoop的yarn-site.xml文件( YARN守护进程的配置项,包括资源管理器、web应用代理服务器和节点管理器 )
[root@master hadoop]# vi yarn-site.xmlyarn.resourcemanager.ha.enabled true yarn.resourcemanager.cluster-id yrc yarn.resourcemanager.ha.rm-ids rm1,rm2 yarn.resourcemanager.hostname.rm1 master yarn.resourcemanager.hostname.rm2 slave1 yarn.resourcemanager.zk-address master:2181,slave1:2181,slave2:2181 yarn.nodemanager.aux-services mapreduce_shuffle yarn.log-aggregation-enable true yarn.log-aggregation.retain-seconds 86400 yarn.resourcemanager.recovery.enabled true yarn.resourcemanager.store.class org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
9.配置Hadoop的slaves配置文件(控制我们的从节点在哪里 datanode nodemanager在哪些机器上)
[root@master hadoop]# vi slaves master slave1 slave2
10.分发文件到从节点
(1)分发Hadoop文件
//分发到slave1节点 [root@master ~]# scp -r /usr/local/src/hadoop/ root@slave1:/usr/local/src/ //分发到slave2节点 [root@master ~]# scp -r /usr/local/src/hadoop/ root@slave2:/usr/local/src/
(2)分发环境变量
//分发到slave1节点 [root@master ~]# scp -r /etc/profile root@slave1:/etc/ //分发到slave2节点 [root@master ~]# scp -r /etc/profile root@slave2:/etc/
11.修改所有者和所有者组
[root@master ~]# chown hadoop:hadoop /usr/local/src/hadoop/ [root@slave1 ~]# chown hadoop:hadoop /usr/local/src/hadoop/ [root@slave2 ~]# chown hadoop:hadoop /usr/local/src/hadoop/
2.生效环境变量
[root@master ~]# su hadoop [hadoop@master root]$ cd [hadoop@master ~]$ source /etc/profile [root@slave1 ]# su hadoop [hadoop@slave1 root]$ cd [hadoop@slave1 ~]$ source /etc/profile [root@slave2 ~]# su hadoop [hadoop@slave2 root]$ cd [hadoop@slave2 ~]$ source /etc/profile
ok,到这里就完成了Hadoop HA高可用集群的配置
[hadoop@master ~]$ hadoop version
Hadoop 2.7.1
Subversion https://git-wip-us.apache.org/repos/asf/hadoop.git -r 15ecc87ccf4a0228f35af08fc56de536e6ce657a
Compiled by jenkins on 2015-06-29T06:04Z
Compiled with protoc 2.5.0
From source with checksum fc0a1a23fc1868e4d5ee7fa2b28a58a
This command was run using /usr/local/src/hadoop/share/hadoop/common/hadoop-common-2.7.1.jar
下一章讲解Hadoop HA集群的启动与测试
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)