hostname | hp5 | hp6 | hp7 |
---|---|---|---|
ip | 10.31.1.119 | 10.31.1.120 | 10.31.1.121 |
内存 | 8G | 8G | 8G |
username | root | root | root |
IP | 主机名 | 功能 |
---|---|---|
10.31.1.119 | hp5 | NameNode DataNode ResourceManager NodeManager |
10.31.1.120 | hp6 | DataNode NodeManager |
10.31.1.121 | hp7 | SecondryNameNode DataNode NodeManager |
软件 | 版本 |
---|---|
CentOS | 7.8 |
Java | JDK 8 |
Hadoop | 3.3.2 |
最开始使用open jdk11,后面安装hive和hudi的时候出了问题,改回JDK8版本后,问题解决。
yum install -y epel-release
yum install -y net-tools
yum install -y vim
hostname hadoop01 #设置临时主机名
vi /etc/hostname #设置永久主机名
10.31.1.119 hp5
10.31.1.120 hp6
10.31.1.121 hp7
systemctl stop firewalld #关闭防火墙
systemctl disable firewalld #禁用防火墙
#在每个节点上执行(所有节点)
ssh-keygen -t rsa #输入内容直接回车
#将生成的公钥添加到免密登陆keys中(主节点)
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
#将公钥复制到2台从节点上(主节点)
scp ~/.ssh/id_rsa.pub root@hp6:~/
scp ~/.ssh/id_rsa.pub root@hp7:~/
#在从节点上将主节点的公钥添加到免密登陆keys中(非主节点)
cat ~/id_rsa.pub >> ~/.ssh/authorized_keys
yum -y install ntpdate
#同步时间
ntpdate -u ntp.sjtu.edu.cn
#定时同步,每小时整点执行一次
vi /etc/crontab
0 */1 * * * root ntpdate -u ntp.sjtu.edu.cn
cd /home/software
#下载
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.3.2/hadoop-3.3.2.tar.gz --no-check-certificate
#解压
tar -zxvf hadoop-3.3.2.tar.gz -C /home
vi /etc/profile
export HADOOP_HOME=/home/hadoop-3.3.2
source /etc/profile
cd /home/hadoop-3.3.2/etc/hadoop/
vi hadoop-env.sh
#添加如下内容
export JAVA_HOME=/usr/local/java/jdk1.8.0_211
export HADOOP_HOME=/home/hadoop-3.3.2
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
export HADOOP_PID_DIR=${HADOOP_HOME}/pid
fs.defaultFS hdfs://hp5:8020 hadoop.tmp.dir /home/hadoop_repo
dfs.namenode.secondary.http-address hp5:50090 dfs.replication 3 dfs.namenode.name.dir file:/home/hadoop-3.3.2/tmp/dfs/name dfs.datanode.data.dir file:/home/hadoop-3.3.2/tmp/dfs/data
mapreduce.framework.name yarn mapreduce.jobhistory.address hp5:10020 mapreduce.jobhistory.webapp.address hp5:19888 yarn.app.mapreduce.am.env HADOOP_MAPRED_HOME=/home/hadoop-3.3.2 mapreduce.map.env HADOOP_MAPRED_HOME=/home/hadoop-3.3.2 mapreduce.reduce.env HADOOP_MAPRED_HOME=/home/hadoop-3.3.2
yarn.nodemanager.aux-services mapreduce_shuffle yarn.nodemanager.env-whitelist JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME yarn.resourcemanager.hostname hp5 yarn.log-aggregation-enable true yarn.log.server.url http://hp5:19888/jobhistory/logs/ yarn.nodemanager.resource.memory-mb 8192 yarn.nodemanager.resource.cpu-vcores 8 #分配给每个Container(容器)的最小执行内存yarn.scheduler.minimum-allocation-mb 128
hp5
hp6
hp7
cd sbin
# 都加在脚本最前面
vi start-dfs.sh
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=rootvi stop-dfs.sh
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=rootvi start-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=rootvi stop-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
#将hadoop拷贝到其它节点
#格式化namenode节点
cd /home/hadoop-3.3.2/bin
bash hdfs namenode -format
#出现如下信息说明格式化成功
common.Storage: Storage directory /home/hadoop_repo/dfs/name has been successfully formatted.
cd /home/hadoop-3.3.2/sbin
./start-all.sh #启动集群
./stop-all.sh #停止集群cd /home/hadoop-3.3.2/bin/
./mapred --daemon start historyserver#每个节点启动
#主节点jps
NameNode
SecondaryNameNode
ResourceManager
#从节点jps
DataNode
NodeManager
#资源信息查看
http://10.31.1.119:8088/
#存储节点信息查看
http://10.31.1.119:9870/
#资源信息查看
cd /home/hadoop-3.3.2/bin/
./yarn logs -applicationId application_1639553331593_0001