hadoop3 高可用集群

hadoop3 高可用集群,第1张

1. 依赖

该教程依赖 Zookeeper 集群,请先查看:

Zookeeper 集群搭建

2. 下载与安装

下载 hadoop,这里选择 3.3.4

点击前往下载

# 解压
tar -zxvf /opt/software/hadoop-3.3.4.tar.gz -C /opt/module

# 配置环境变量
vim /etc/profile.d/my_env.sh

# 写入一下内容
export HADOOP_HOME=/opt/module/hadoop-3.3.4
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
3. HDFS 高可用 3.1. 配置

配置文件所在路径:/opt/module/hadoop-3.3.4/etc/hadoop

3.1.1. hadoop-env.sh

在文档末尾加入一下内容,这里由于是测试环境,内存给的比较小

# 配置 JAVA 程序的堆内存,相当于 Xmx 和 Xms,单位默认为 MB
export HADOOP_HEAPSIZE_MAX=64
export HADOOP_HEAPSIZE_MIN=64

# 设置启动用户为 ROOT,如果不使用 ROOT 启动可以不配置
HDFS_NAMENODE_USER=root
HDFS_DATANODE_USER=root
HDFS_JOURNALNODE_USER=root
HDFS_ZKFC_USER=root
YARN_RESOURCEMANAGER_USER=root
YARN_NODEMANAGER_USER=root
3.1.2. core-site.xml


<configuration>

    
    <property>
        <name>ha.zookeeper.quorumname>
        <value>mitchell-102:2181,mitchell-103:2181,mitchell-104:2181value>
    property> 

    
    <property>
        <name>fs.defaultFSname>
        <value>hdfs://myclustervalue>
    property>
    
    
    <property>
        <name>hadoop.tmp.dirname>
        <value>/opt/module/hadoop-3.3.4/datavalue>
    property>

    
    <property>
        <name>hadoop.http.staticuser.username>
        <value>rootvalue>
    property>
    
    
    <property>
        <name>ipc.client.connect.max.retriesname>
        <value>100value>
    property>
    
    
    <property>
        <name>ipc.client.connect.retry.intervalname>
        <value>10000value>
    property>

configuration>
3.1.3. hdfs-site.xml


<configuration>
    
    
    <property>
        <name>dfs.nameservicesname>
        <value>myclustervalue>
    property>
    
    
    <property>
        <name>dfs.ha.namenodes.myclustername>
        <value>nn2,nn3,nn4value>
    property>
    
    
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn2name>
        <value>mitchell-102:8020value>
    property>
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn3name>
        <value>mitchell-103:8020value>
    property>    
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn4name>
        <value>mitchell-104:8020value>
    property>
    
     
    <property>
        <name>dfs.namenode.http-address.mycluster.nn2name>
        <value>mitchell-102:9870value>
    property>
    <property>
        <name>dfs.namenode.http-address.mycluster.nn3name>
        <value>mitchell-103:9870value>
    property>    
    <property>
        <name>dfs.namenode.http-address.mycluster.nn4name>
        <value>mitchell-104:9870value>
    property>
    
    
    <property>
        <name>dfs.namenode.shared.edits.dirname>
        <value>qjournal://mitchell-102:8485;mitchell-103:8485;mitchell-104:8485/myclustervalue>
    property>
    
    
    <property>
        <name>dfs.namenode.name.dirname>
        <value>/data/hadoop-3.3.1/data/namenodevalue>
    property>
    
    
    <property>
        <name>dfs.datanode.data.dirname>
        <value>/data/hadoop-3.3.1/data/datanodevalue>
    property>
    
    
    <property>
        <name>dfs.journalnode.edits.dirname>
        <value>${hadoop.tmp.dir}/journalnodevalue>
    property>    
    
    
    <property>
        <name>dfs.ha.automatic-failover.enabledname>
        <value>truevalue>
    property>
    
    
    <property>
        <name>dfs.client.failover.proxy.provider.myclustername>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
    property>

    
    <property>
        <name>dfs.ha.fencing.methodsname>
        <value>sshfencevalue>
    property>
    
    
    <property>
        <name>dfs.ha.fencing.ssh.private-key-filesname>
        <value>/home/hadoop/.ssh/id_rsavalue>
    property>
    
    
    <property>
        <name>dfs.ha.fencing.ssh.connect-timeoutname>
        <value>30000value>
    property>
    
configuration>
3.1.4. workers
mitchell-102
mitchell-103
mitchell-104
3.2. 启动和关闭
# 【102】分发 hadoop
xsync /opt/module/hadoop-3.3.4

# 【102、103、104】启动 journalnode
hdfs --daemon start journalnode 

# 【102】格式化 namenode
hdfs namenode -format

# 【102】启动 namenode
hdfs --daemon start namenode

# 【103、104】其它服务器拉取原数据
hdfs namenode -bootstrapStandby

# 【102】初始化 zkfc,需要先启动 zookeeper 集群
hdfs zkfc -formatZK

# 【102】启动和关闭集群
start-dfs.sh
stop-dfs.sh

# 【102】查看集群是否启动成功
jpsall
4. YARN 高可用 4.1. 配置 4.1.1. yarn-site

<configuration>

    
    <property>
        <name>hadoop.zk.addressname>
        <value>mitchell-102:2181,mitchell-103:2181,mitchell-104:2181value>
    property> 

    
    <property>
        <name>yarn.nodemanager.aux-servicesname>
        <value>mapreduce_shufflevalue>
    property>
    
     
    <property>
        <name>yarn.resourcemanager.ha.enabledname>
        <value>truevalue>
    property>
    
    
    <property>
        <name>yarn.resourcemanager.cluster-idname>
        <value>cluster-yarnvalue>
    property>
    
    
    <property>
        <name>yarn.resourcemanager.ha.rm-idsname>
        <value>rm2,rm3,rm4value>
    property>

    
    <property>
        <name>yarn.resourcemanager.hostname.rm2name>
        <value>mitchell-102value>
    property>
    <property>
        <name>yarn.resourcemanager.hostname.rm3name>
        <value>mitchell-103value>
    property>
    <property>
        <name>yarn.resourcemanager.hostname.rm4name>
        <value>mitchell-104value>
    property>

    
    <property>
        <name>yarn.resourcemanager.address.rm2name>
        <value>mitchell-102:8032value>
    property>
    <property>
        <name>yarn.resourcemanager.address.rm3name>
        <value>mitchell-103:8032value>
    property>
    <property>
        <name>yarn.resourcemanager.address.rm4name>
        <value>mitchell-104:8032value>
    property>    

    
    <property>
        <name>yarn.resourcemanager.webapp.address.rm2name>
        <value>mitchell-102:8088value>
    property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm3name>
        <value>mitchell-103:8088value>
    property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm4name>
        <value>mitchell-104:8088value>
    property>    

    
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm2name>
        <value>mitchell-102:8030value>
    property>
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm3name>
        <value>mitchell-103:8030value>
    property>
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm4name>
        <value>mitchell-104:8030value>
    property>
    
    
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm2name>
        <value>mitchell-102:8031value>
    property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm3name>
        <value>mitchell-103:8031value>
    property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm4name>
        <value>mitchell-104:8031value>
    property>
    
    
    <property>
        <name>yarn.resourcemanager.recovery.enabledname>
        <value>truevalue>
    property>
    
    
    <property>
        <name>yarn.resourcemanager.store.classname>                            
        <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStorevalue>
    property>

configuration>
4.2. 启动和关闭
# 分发配置文件
xsync /opt/module/hadoop-3.3.4/etc

# 启动和关闭
start-yarn.sh
stop-yarn.sh

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/web/2990157.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-09-23
下一篇 2022-09-23

发表评论

登录后才能评论

评论列表(0条)

保存