一个80后
程序员的笔记

[Centos7]安装Hadoop 3.2

1、调试环境:

#关闭防火墙
systemctl stop firewalld
systemctl disable firewalld
#设置免密登录
ssh-keygen -t rsa  #然后一路回车即可
#复制到公共密钥中
cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
#测试
ssh localhost
#修改hosts,添加
192.168.40.50 master
192.168.40.51 data01
192.168.40.52 data02

2、安装java

3、Hadoop 安装

#下载地址:http://hadoop.apache.org/
# 解压 Hadoop 到指定文件夹
tar -zxvf hadoop-3.2.0.tar.gz -C /usr/local
ln -sf hadoop-3.2.0 hadoop

vim /etc/profile
#Hadoop
export HADOOP_HOME=/usr/local/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH

source /etc/profile

3.1、修改 Hadoop 配置文件

/usr/local/hadoop/etc/hadoop/hadoop-env.sh

vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh
#修改为如下配置  这里是测试,正式环境建议新增用户,不要用root
export JAVA_HOME=/usr/local/jdk1.8
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export HDFS_NAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root

/usr/local/hadoop/etc/hadoop/core-site.xml

vim /usr/local/hadoop/etc/hadoop/core-site.xml
#修改为如下配置
<configuration>
    <!-- 指定HDFS老大(namenode)的通信地址 -->
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://master:9000</value>
    </property>
    <!-- 指定hadoop运行时产生文件的存储路径 -->
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/data/hadoop/tmp</value>
    </property>
    <property>
        <name>io.file.buffer.size</name>
        <value>131072</value>
    </property>
</configuration>

/usr/local/hadoop/etc/hadoop/hdfs-site.xml

vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
#修改为如下配置
<configuration>
    <!-- 设置hdfs副本数量 -->
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>
    <property>
        <name>dfs.permissions.enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:/data/hadoop/dfs/name</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:/data/hadoop/dfs/data</value>
    </property>
    <!-- 设置namenode的http通讯地址 -->
    <property>
        <name>dfs.namenode.http-address</name>
        <value>master:50070</value>
    </property>
    <!-- 设置secondarynamenode的http通讯地址 -->
    <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>master:50090</value>
    </property>
</configuration>

/usr/local/hadoop/etc/hadoop/yarn-site.xml

vim /usr/local/hadoop/etc/hadoop/yarn-site.xml 
#修改为如下配置
<configuration>

<!-- Site specific YARN configuration properties -->
    <!-- 设置 resourcemanager 在哪个节点-->
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>master</value>
    </property>
    <property>
        <name>yarn.nodemanager.hostname</name>
        <value>master</value>
    </property>
    <!-- reducer取数据的方式是mapreduce_shuffle -->
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.resource.memory-mb</name>
        <value>6144</value>
    </property>
    <property>
        <name>yarn.app.mapreduce.am.resource.memory-mb</name>
        <value>512</value>
        <description>设置application master 容器请求的内存的值,以MB为单位。默认1536</description>
    </property>
    <property>
        <name>mapreduce.map.resource.memory-mb</name>
        <value>256</value>
        <description>设置所有map任务的Container的请求memory,以MB为单位。默认值1024</description>
    </property>
    <property>
        <name>mapreduce.reduce.resource.memory-mb</name>
        <value>512</value>
        <description>设置所有reduce任务Containers请求的memory的值,以MB为单位。默认1024</description>
    </property>
    <property>
        <name>yarn.scheduler.minimum-allocation-mb</name>
        <value>64</value>
    </property>
    <property>
        <name>yarn.scheduler.maximum-allocation-mb</name>
        <value>6144</value>
    </property>
    <property>
        <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
        <value>100</value>
    </property>
    <property>
        <name>yarn.application.classpath</name>
        <value>
            /usr/local/hadoop/etc/hadoop,
            /usr/local/hadoop/share/hadoop/common/*,
            /usr/local/hadoop/share/hadoop/common/lib/*,
            /usr/local/hadoop/share/hadoop/hdfs/*,
            /usr/local/hadoop/share/hadoop/hdfs/lib/*,
            /usr/local/hadoop/share/hadoop/mapreduce/*,
            /usr/local/hadoop/share/hadoop/mapreduce/lib/*,
            /usr/local/hadoop/share/hadoop/yarn/*,
            /usr/local/hadoop/share/hadoop/yarn/lib/*
        </value>
    </property>
</configuration>

/usr/local/hadoop/etc/hadoop/mapred-site.xml

vim /usr/local/hadoop/etc/hadoop/mapred-site.xml
 #修改为如下配置
<configuration>
    <!-- 通知框架MR使用YARN -->
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>yarn.nodemanager.vmem-check-enabled</name>
        <value>false</value>
        <description>Whether virtual memory limits will be enforced for containers</description>
    </property>
    <!-- 额外新增 -->
    <property>  
        <name>yarn.app.mapreduce.am.resource.mb</name>  
        <value>512</value>
    </property>
    <property> 
        <name>yarn.app.mapreduce.am.command-opts</name> 
        <value>-Xmx409m</value>
    </property>
    <property>
        <name>mapreduce.map.memory.mb</name>
        <value>512</value>
    </property>
    <property>
        <name>mapreduce.reduce.memory.mb</name>
        <value>512</value>
    </property>
    <property>
        <name>mapreduce.map.java.opts</name>
        <value>-Xmx409m</value>
    </property>
    <property>
        <name>mapreduce.reduce.java.opts</name>
        <value>-Xmx409m</value>
    </property>
</configuration>

/usr/local/hadoop/etc/hadoop/hdfs-site.xml

vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
#修改为如下配置
<configuration>
    <!-- 设置hdfs副本数量 -->
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>
    <property>
        <name>dfs.permissions.enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:/data/hadoop/dfs/name</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:/data/hadoop/dfs/data</value>
    </property>
    <!-- 设置namenode的http通讯地址 -->
    <property>
        <name>dfs.namenode.http-address</name>
        <value>master:50070</value>
    </property>
    <!-- 设置secondarynamenode的http通讯地址 -->
    <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>master:50090</value>
    </property>
</configuration>

3.2、如果需要多台架构的话,克隆机器

hostnamectl set-hostname hadoop-data01
vim /etc/sysconfig/network-scripts/ifcfg-ens192
systemctl restart network

3.3、配置启动hadoop

#首先格式化HDFS目录
/usr/local/hadoop/bin/hdfs namenode -format
#启动服务
/usr/local/hadoop/sbin/start-all.sh
赞(1) 打赏
未经允许不得转载:FoolTiger笔记本 » [Centos7]安装Hadoop 3.2
分享到: 更多 (0)

评论 抢沙发

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

微信扫一扫打赏