安装单机hadoop
创建用户并配置无密码登录
创建hadoop用户
1
sudo useradd -m hadoop -s /bin/bash
改个密码
1
sudo passwd hadoop
赋予管理员权限
1
sudo adduser hadoop sudo
尝试使用密码登录localhost
1
ssh localhost
退出localhost
1
exit
配置无密码登录
1
2
3
cd ~/.ssh/
ssh-keygen -t rsa
cat ./id_rsa.pub >> ./authorized_keys
再次尝试进入localhost,不需要输入密码则成功
1
ssh localhost
安装java环境
创建文件夹
1
2
3
4
5
cd /usr/lib
sudo mkdir jvm
cd ~
sudo mkdir Donwloads # 如果没有的话(gnomo桌面版有自带)
cd Downloads
传入jdk安装包
1
2
sudo rz -e
sudo sz dk-8u162-linux-x64.tar.gz
解压jdk
1
sudo tar -zxvf ./jdk-8u162-linux-x64.tar.gz -C /usr/lib/jvm
设置环境变量
1
2
3
4
5
6
7
8
9
10
cd ~
vim ~/.bashrc
# 开头加入
export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_162
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
source ~/.bashrc
查看是否安装成功
1
java -version
安装Hadoop
传文件
1
2
3
4
cd Downloads
sudo rz -e
sudo sz hadoop-3.1.3.tar.gz
解压安装包
1
sudo tar -zxf ~/Downloads/hadoop-3.1.3.tar.gz -C /usr/local
改名
1
2
cd /usr/local/
sudo mv ./hadoop-3.1.3/ ./hadoop
修改文件权限
1
sudo chown -R hadoop ./hadoop
检查是否安装成功
1
2
cd /usr/local/hadoop
./bin/hadoop version
打包容器
查看容器id
1
docker ps
生成镜像
1
docker commit (container id) (image's name)
Hadoop 伪分布式
修改配置
1
2
3
4
5
6
7
8
9
10
11
12
13
vim ./etc/hadoop/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/hadoop/tmp</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
vim ./etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop/tmp/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop/tmp/dfs/data</value>
</property>
</configuration>
NameNode格式化
1
2
cd /usr/local/hadoop
./bin/hdfs namenode -format
启动hadoop服务
1
2
cd /usr/local/hadoop
./sbin/start-dfs.sh
关闭hadoop服务
1
2
cd /usr/local/hadoop
./sbin/stop-dfs.sh
Hadoop 分布式
master
创建新容器(master / slave)
1
docker run -p (port:port) --privileged -ti --net staticnet --ip (static ip address) --name=(container name) (image's name) /bin/bash
开启ssh服务
1
service ssh restart
修改workers文件
1
2
3
4
5
cd /usr/local/hadoop/etc/hadoop
vim workers
删去localhost
加入Slave1
修改core-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
vim core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://Master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/hadoop/tmp</value>
<description>Abase for other temporary directories.</description>
</property>
</configuration>
xxxxxxxxxx sudo apt install docker-ce docker-ce-cli containerd.iotext
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
vim hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>Master:50090</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value> # 集群中的数据节点数量
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop/tmp/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop/tmp/dfs/data</value>
</property>
</configuration>
修改文件mapred-site.xml文件名
1
sudo mv ./mapred-site.xml.template ./mapred-site.xml
修改mapred-site.xml文件配置
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>Master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>Master:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/usr/local/hadoop</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/usr/local/hadoop</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/usr/local/hadoop</value>
</property>
</configuration>
修改yarn-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
vim yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>Master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
清除伪分布下生成的临时文件
1
2
3
4
5
6
cd /usr/local
sudo rm -r ./hadoop/tmp
sudo rm -r ./hadoop/logs/*
tar -zcf ~/hadoop.master.tar.gz ./hadoop
cd ~
scp ./hadoop.master.tar.gz Slave1:/home/hadoop