该教程依赖 Zookeeper 集群,请先查看:
Zookeeper 集群搭建
2. 下载与安装下载 hadoop,这里选择 3.3.4
点击前往下载
# 解压
tar -zxvf /opt/software/hadoop-3.3.4.tar.gz -C /opt/module
# 配置环境变量
vim /etc/profile.d/my_env.sh
# 写入一下内容
export HADOOP_HOME=/opt/module/hadoop-3.3.4
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
3. HDFS 高可用
3.1. 配置
配置文件所在路径:/opt/module/hadoop-3.3.4/etc/hadoop
3.1.1. hadoop-env.sh在文档末尾加入一下内容,这里由于是测试环境,内存给的比较小
# 配置 JAVA 程序的堆内存,相当于 Xmx 和 Xms,单位默认为 MB
export HADOOP_HEAPSIZE_MAX=64
export HADOOP_HEAPSIZE_MIN=64
# 设置启动用户为 ROOT,如果不使用 ROOT 启动可以不配置
HDFS_NAMENODE_USER=root
HDFS_DATANODE_USER=root
HDFS_JOURNALNODE_USER=root
HDFS_ZKFC_USER=root
YARN_RESOURCEMANAGER_USER=root
YARN_NODEMANAGER_USER=root
3.1.2. core-site.xml
<configuration>
<property>
<name>ha.zookeeper.quorumname>
<value>mitchell-102:2181,mitchell-103:2181,mitchell-104:2181value>
property>
<property>
<name>fs.defaultFSname>
<value>hdfs://myclustervalue>
property>
<property>
<name>hadoop.tmp.dirname>
<value>/opt/module/hadoop-3.3.4/datavalue>
property>
<property>
<name>hadoop.http.staticuser.username>
<value>rootvalue>
property>
<property>
<name>ipc.client.connect.max.retriesname>
<value>100value>
property>
<property>
<name>ipc.client.connect.retry.intervalname>
<value>10000value>
property>
configuration>
3.1.3. hdfs-site.xml
<configuration>
<property>
<name>dfs.nameservicesname>
<value>myclustervalue>
property>
<property>
<name>dfs.ha.namenodes.myclustername>
<value>nn2,nn3,nn4value>
property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2name>
<value>mitchell-102:8020value>
property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn3name>
<value>mitchell-103:8020value>
property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn4name>
<value>mitchell-104:8020value>
property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2name>
<value>mitchell-102:9870value>
property>
<property>
<name>dfs.namenode.http-address.mycluster.nn3name>
<value>mitchell-103:9870value>
property>
<property>
<name>dfs.namenode.http-address.mycluster.nn4name>
<value>mitchell-104:9870value>
property>
<property>
<name>dfs.namenode.shared.edits.dirname>
<value>qjournal://mitchell-102:8485;mitchell-103:8485;mitchell-104:8485/myclustervalue>
property>
<property>
<name>dfs.namenode.name.dirname>
<value>/data/hadoop-3.3.1/data/namenodevalue>
property>
<property>
<name>dfs.datanode.data.dirname>
<value>/data/hadoop-3.3.1/data/datanodevalue>
property>
<property>
<name>dfs.journalnode.edits.dirname>
<value>${hadoop.tmp.dir}/journalnodevalue>
property>
<property>
<name>dfs.ha.automatic-failover.enabledname>
<value>truevalue>
property>
<property>
<name>dfs.client.failover.proxy.provider.myclustername>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
property>
<property>
<name>dfs.ha.fencing.methodsname>
<value>sshfencevalue>
property>
<property>
<name>dfs.ha.fencing.ssh.private-key-filesname>
<value>/home/hadoop/.ssh/id_rsavalue>
property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeoutname>
<value>30000value>
property>
configuration>
3.1.4. workers
mitchell-102
mitchell-103
mitchell-104
3.2. 启动和关闭
# 【102】分发 hadoop
xsync /opt/module/hadoop-3.3.4
# 【102、103、104】启动 journalnode
hdfs --daemon start journalnode
# 【102】格式化 namenode
hdfs namenode -format
# 【102】启动 namenode
hdfs --daemon start namenode
# 【103、104】其它服务器拉取原数据
hdfs namenode -bootstrapStandby
# 【102】初始化 zkfc,需要先启动 zookeeper 集群
hdfs zkfc -formatZK
# 【102】启动和关闭集群
start-dfs.sh
stop-dfs.sh
# 【102】查看集群是否启动成功
jpsall
4. YARN 高可用
4.1. 配置
4.1.1. yarn-site
<configuration>
<property>
<name>hadoop.zk.addressname>
<value>mitchell-102:2181,mitchell-103:2181,mitchell-104:2181value>
property>
<property>
<name>yarn.nodemanager.aux-servicesname>
<value>mapreduce_shufflevalue>
property>
<property>
<name>yarn.resourcemanager.ha.enabledname>
<value>truevalue>
property>
<property>
<name>yarn.resourcemanager.cluster-idname>
<value>cluster-yarnvalue>
property>
<property>
<name>yarn.resourcemanager.ha.rm-idsname>
<value>rm2,rm3,rm4value>
property>
<property>
<name>yarn.resourcemanager.hostname.rm2name>
<value>mitchell-102value>
property>
<property>
<name>yarn.resourcemanager.hostname.rm3name>
<value>mitchell-103value>
property>
<property>
<name>yarn.resourcemanager.hostname.rm4name>
<value>mitchell-104value>
property>
<property>
<name>yarn.resourcemanager.address.rm2name>
<value>mitchell-102:8032value>
property>
<property>
<name>yarn.resourcemanager.address.rm3name>
<value>mitchell-103:8032value>
property>
<property>
<name>yarn.resourcemanager.address.rm4name>
<value>mitchell-104:8032value>
property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2name>
<value>mitchell-102:8088value>
property>
<property>
<name>yarn.resourcemanager.webapp.address.rm3name>
<value>mitchell-103:8088value>
property>
<property>
<name>yarn.resourcemanager.webapp.address.rm4name>
<value>mitchell-104:8088value>
property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2name>
<value>mitchell-102:8030value>
property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm3name>
<value>mitchell-103:8030value>
property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm4name>
<value>mitchell-104:8030value>
property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2name>
<value>mitchell-102:8031value>
property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm3name>
<value>mitchell-103:8031value>
property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm4name>
<value>mitchell-104:8031value>
property>
<property>
<name>yarn.resourcemanager.recovery.enabledname>
<value>truevalue>
property>
<property>
<name>yarn.resourcemanager.store.classname>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStorevalue>
property>
configuration>
4.2. 启动和关闭
# 分发配置文件
xsync /opt/module/hadoop-3.3.4/etc
# 启动和关闭
start-yarn.sh
stop-yarn.sh
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)