大数据各组件安装笔记汇总
大数据软件安装笔记shell脚本查看机器进程脚本#!/bin/bashecho"-----------------cdh01-------------------------"/opt/apps/jdk1.8.0_45/bin/jpsecho ""for((i=2;i<=3;i++)){echo "-----------------cdh0$i------------------------
·
大数据各组件安装笔记
shell脚本
查看机器进程脚本
#!/bin/bash
echo "-----------------cdh01-------------------------"
/opt/apps/jdk1.8.0_45/bin/jps
echo ""
for((i=2;i<=3;i++)){
echo "-----------------cdh0$i------------------------"
ssh cdh0$i "/opt/apps/jdk1.8.0_45/bin/jps"
echo ""
}
多台机器启动关闭zookeeper脚本
chmod +x myzkserver.sh
vi myzkserver.sh
#!/bin/bash
case $1 in
start)
for myhost in cdh01 cdh02 cdh03
do
echo "---------------$myhost---------------"
ssh $myhost > /dev/null 2>&1 << eof
/opt/apps/zookeeper-3.4.5-cdh5.7.6/bin/zkServer.sh start
exit
eof
done
;;
status)
for((i=1;i<=3;i++)){
echo "---------------cdh0$i---------------"
ssh -Tq cdh0$i 2>/dev/null << eof
/opt/apps/zookeeper-3.4.5-cdh5.7.6/bin/zkServer.sh status
exit
eof
}
;;
stop)
for((i=1;i<=3;i++)){
echo "---------------cdh0$i---------------"
ssh cdh0$i "/opt/apps/zookeeper-3.4.5-cdh5.7.6/bin/zkServer.sh stop" >/dev/null 2>&1
}
;;
esac
/root/bin/jps.sh
虚拟机环境变量
#envrioment
#export JAVA_HOME=/opt/apps/jdk1.8.0_45
export JAVA_HOME=/opt/apps/jdk1.8.0_261
export HADOOP_HOME=/opt/apps/hadoop-2.8.1
#export HADOOP_HOME=/opt/apps/hadoop-2.7.6
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
#export HIVE_HOME=/opt/apps/hive-1.1.0-cdh5.7.6
export HIVE_HOME=/opt/apps/hive-1.2.1
export HBASE_HOME=/opt/apps/hbase-1.2.0-cdh5.7.6
export ZOOKEEPER_HOME=/opt/apps/zookeeper-3.4.10
export FLUME_HOME=/opt/apps/flume-1.6.0-cdh5.7.6
#export SQOOP_HOME=/opt/apps/sqoop-1.4.6-cdh5.7.6
export SQOOP_HOME=/opt/apps/sqoop-1.4.7
export AZKABAN_HOME=/opt/apps/azkaban-solo-server-0.1.0
export ANT_HOME=/opt/apps/ant-1.8.1
export M2_HOME=/opt/apps/maven-3.3.9
export SCALA_HOME=/opt/apps/scala-2.11.8
export SPARK_HOME=/opt/apps/spark-2.2.0
export OOZIE_HOME=/opt/apps/oozie-4.1.0-cdh5.7.6
export KAFKA_HOME=/opt/apps/kafka-2.11
export REDIS_HOME=/opt/apps/redis-3.2.8
export REDIS_CONF=$REDIS_HOME/conf
export ELASTICSEARCH_HOME=/opt/apps/elasticsearch-6.5.3
export FLINK_HOME=/opt/apps/flink-1.9.1
export KIBANA_HOME=/opt/apps/kibana-6.5.3
export PRESTO_HOME=/opt/apps/presto-server
export PATH=$PATH:$KIBANA_HOME/bin:$FLINK_HOME/bin:$OOZIE_HOME/bin
#export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export CLASSPATH=.:$JAVA_HOME/lib
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$HBASE_HOME/bin
export PATH=$PATH:$ZOOKEEPER_HOME/bin:$FLUME_HOME/bin:$SQOOP_HOME/bin:$AZKABAN_HOME/bin
export PATH=$PATH:$ANT_HOME/bin:$M2_HOME/bin:$SCALA_HOME/bin:$SPARK_HOME/sbin:$SPARK_HOME/bin
export PATH=$PATH:$KAFKA_HOME/bin:$REDIS_HOME/bin:$ELASTICSEARCH_HOME/bin:$PRESTO_HOME/bin
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/apps/hbase-1.2.0-cdh5.7.6/lib/*
1.虚拟机安装
1.1开机配置ip
vi /etc/sysconfig/network-scripts/ifcfg-ens33
BOOTPROTO="static" # 使用静态IP地址,默认为dhcp
IPADDR="192.168.10.101" # 设置的静态IP地址
NETMASK="255.255.255.0" # 子网掩码
GATEWAY="192.168.10.2" # 网关地址
DNS1=8.8.8.8 # DNS服务器
DNS2=8.8.4.4
DNS3=114.114.114.114
ONBOOT="yes" #是否开机启用
1.2重启network服务
systemctl restart network
1.3修改主机名(或vi /etc/hostname)
hostnamectl set-hostname cdh01
1.4查看本机ip、ping、重启虚拟机
ip addr
ping外网、ping主机ip
ping www.baidu.com
ping 主机ip地址
reboot
1.5下载插件
yum -y install ifconfig
yum search ifconfig
#直接安装
yum -y install net-tools.x86_64
yum -y install vim
1.6安装ntp客户端和服务器
yum -y install ntpdate
yum -y install ntp
#ntpdate同步时间:
ntpdate -u time.windows.com
systemctl start ntpd
systemctl enable ntpd
1.8关闭虚拟机防火墙+配置hosts文件
关闭:
systemctl stop firewalld
systemctl disable firewalld
开启:
systemctl enable firewalld
systemctl status firewalld
#关闭NetworkManager
常用管理命令:
systemctl status NetworkManager
systemctl start NetworkManager
systemctl stop NetworkManager
systemctl disable NetworkManager
systemctl enable NetworkManager
vi /etc/hosts
192.168.10.101 cdh01
192.168.10.102 cdh02
192.168.10.103 cdh03
1.9克隆并修改IP+主机名
2.安装Hadoop、JDK
伪分布式安装
1.解压安装jdk+hadoop+环境变量
2.配置免密登录
ssh-ketgen -t rsa
ssh-copy-id root@localhost
ssh localhost
ssh 0.0.0.0
3.配置文件
3.1hadoop-env.sh
# The java implementation to use.
export JAVA_HOME=/opt/apps/jdk1.8.0_261
3.2hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/apps/hadoop-2.8.1/hdpdata/hdfs/name</value>
</property>
<property>
<name>dfs.namenode.data.dir</name>
<value>/opt/apps/hadoop-2.8.1/hdpdata/hdfs/data</value>
</property>
</configuration>
3.3core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://zhuguofu:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/apps/hadoop-2.8.1/hdpdata</value>
</property>
</configuration>
3.4yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>zhuguofu</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.scheduler.fair.preemption</name>
<value>true</value>
</property>
<property>
<name>yarn.scheduler.fair.preemption.cluster-utilization-threshold</name>
<value>1.0</value>
</property>
</configuration>
3.5mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>zhuguofu:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>zhuguofu:19888</value>
</property>
</configuration>
3.6创建目录
mkdir -p /opt/apps/hadoop-2.8.1/hdpdata/hdfs/name
mkdir -p /opt/apps/hadoop-2.8.1/hdpdata/hdfs/data
3.7格式化
hdfs namenode -format
全分布式安装
2.1hadoop、jdk解压安装
tar -zxvf hadoop-2.6.0-cdh5.7.6.tar.gz -C /opt/apps
tar -zxvf jdk-8u45-linux-x64.tar.gz -C /opt/apps
配置环境变量
vi /etc/profile
source /etc/profile
查看版本
java -version
2.2配置hadoop-env.sh
vi /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/hadoop-env.sh
主要修改下面这个:
# The java implementation to use.
export JAVA_HOME=/opt/apps/jdk1.8.0_45
hadoop-env.sh
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Set Hadoop-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
# The java implementation to use.
export JAVA_HOME=/opt/apps/jdk1.8.0_45
# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
# Where log files are stored. $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
###
# HDFS Mover specific parameters
# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR=/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata
export HADOOP_SECURE_DN_PID_DIR=/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata
# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER
2.3配置hdfs-site.xml
vi /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata/hdfs/name</value>
</property>
<property>
<name>dfs.namenode.data.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata/hdfs/data</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata/hdfs/sname</value>
</property>
<property>
<name>dfs.namenode.checkpoint.edits.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata/hdfs/sname</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<!-- WebHDFS (REST API) in Namenodes and Datanodes. -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
2.4配置core-site.xml
vi /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://cdh01:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata</value>
</property>
<!--hue配置 -->
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>
<!--hive2.x配置 -->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
2.5配置mapred-site.xml
mv mapred-site.xml.template mapred-site.xml
vi /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>cdh01:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>cdh01:19888</value>
</property>
</configuration>
2.6配置yarn-site.xml
vi /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>cdh01</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.scheduler.fair.preemption</name>
<value>true</value>
</property>
<property>
<name>yarn.scheduler.fair.preemption.cluster-utilization-threshold</name>
<value>1.0</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>cdh01:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>cdh01:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>cdh01:8032</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>cdh01:8088</value>
</property>
</configuration>
2.7配置slaves
vi slaves
cdh01
cdh02
cdh03
2.8克隆虚拟机修改ip+主机名,并免密登录
三台机器分别免密
ssh-keygen -t rsa
ssh-copy-id cdh01
ssh-copy-id cdh02
ssh-copy-id cdh03
ssh cdh01
ssh 0.0.0.0
2.9 格式化namenode
hdfs namenode -format
3.安装Hive
3.1解压安装
tar -zxvf hive-1.1.0-cdh5.7.6.tar.gz -C /opt/apps
配置环境变量
vi /etc/profile
...
source /etc/profile
3.2配置hive-env.sh
mv hive-env.sh.template hive-env.sh
vi hive-env.sh
export JAVA_HOME=/opt/apps/jdk1.8.0_45
export HADOOP_HOME=/opt/apps/hadoop-2.6.0-cdh5.7.6
export HIVE_CONF_DIR=/opt/apps/hive-1.1.0-cdh5.7.6/conf
export HIVE_AUX_JARS_PATH=/opt/apps/hive-1.1.0-cdh5.7.6/lib
3.3安装mysql
3.3.0替换阿里云的yum源
下载wget命令
yum -y install wget
wget -O /etc/yum.repos.d/Centos-7.repo http://mirrors.aliyun.com/repo/Centos-7.repo
cd /etc/yum.repos.d
mv CentOS-Base.repo CentOS-Base.repo.bak
mv Centos-7.repo CentOS-Base.repo
yum clean all
yum repolist
3.3.1rpm安装(推荐)
在第二台机器上上传mysql的rpm包
1.yum -y localinstall mysql-community-release-el6-5.noarch.rpm
2.yum -y install mysql-server
安装完毕
3.service mysqld start
4.mysqladmin -uroot password '123456'
5.mysql -uroot -p123456
6.远程授权
GRANT ALL PRIVILEGES ON *.* TO root@'%' IDENTIFIED BY '123456' with grant option;
flush privileges;
GRANT ALL PRIVILEGES ON *.* TO root@'localhost' IDENTIFIED BY '123456' with grant option;
flush privileges;
7.Navicat远程连接并创建hive库,字符集latin1
3.3.2YUM安装mysql
1. 上传安装包:mysql-5.7.28-1.el7.x86_64.rpm-bundle.tar
说明:tar包里存储了多个rpm包。需要释放
2. 拆包,找到相关的rpm包
tar -xvf mysql-5.7.28-1.el7.x86_64.rpm-bundle.tar -C ./
mysql-community-embedded-5.7.28-1.el7.x86_64.rpm
mysql-community-libs-compat-5.7.28-1.el7.x86_64.rpm
mysql-community-devel-5.7.28-1.el7.x86_64.rpm
mysql-community-embedded-compat-5.7.28-1.el7.x86_64.rpm
mysql-community-libs-5.7.28-1.el7.x86_64.rpm
mysql-community-test-5.7.28-1.el7.x86_64.rpm
mysql-community-common-5.7.28-1.el7.x86_64.rpm
mysql-community-embedded-devel-5.7.28-1.el7.x86_64.rpm
mysql-community-client-5.7.28-1.el7.x86_64.rpm
mysql-community-server-5.7.28-1.el7.x86_64.rpm
3. 安装的rpm包:
mysql-community-server-5.7.28-1.el7.x86_64.rpm
步骤:
1. 查询CentOS7.7中是否已经安装了mariadb。如果安装了,请卸载(因为与mysql不兼容)
rpm -qa | grep mariadb
mariadb-libs-5.5.64-1.el7.x86_64 <==强制卸载
rpm -e mariadb-libs-5.5.64-1.el7.x86_64 --nodeps
2. 安装mysql-common 因为被mysql-libs依赖
rpm -ivh mysql-community-common-5.7.28-1.el7.x86_64.rpm
3. 安装mysql-libs 因为被mysql-client依赖
rpm -ivh mysql-community-libs-5.7.28-1.el7.x86_64.rpm
4. 安装mysql-client 因为被mysql-server依赖
rpm -ivh mysql-community-client-5.7.28-1.el7.x86_64.rpm
5. 安装依赖插件:perl 和net-tools
yum -y install perl net-tools
6. 安装mysql-server(yum install libaio阿里云安装)
rpm -ivh mysql-community-server-5.7.28-1.el7.x86_64.rpm
mysql服务端是安装好了,但是服务项没有启动,所以下一步要进行的就是启动服务项
7. 启动mysql的服务项,并检查
systemctl start mysqld
systemctl status mysqld
8. 查找初始密码(启动服务时,会生成/var/log/mysqld.log文件)
cat /var/log/mysqld.log | grep password
2020-04-08T08:15:04.180438Z 1 [Note] A temporary password is generated for root@localhost: dwtqdjaHn0;p
2020-04-08T08:16:02.523447Z 2 [Note] Access denied for user 'root'@'localhost' (using password: NO)
9. 登陆mysql(mysql命令在rpm安装自动安装到相应path对应的目录下了),复制临时密码
mysql -uroot -p
10. 修改密码策略级别为low(mysql5.7.28的密码策略比较高,因此要先设置为low)
mysql> set global validate_password_policy=low;
set global validate_password_length=6;
11. 修改密码(此时的密码长度要求时8位以上的字符)
mysql> alter user root@localhost identified by '123456';
12.远程授权
GRANT ALL PRIVILEGES ON *.* TO root@'%' IDENTIFIED BY '123456' with grant option;
flush privileges;
GRANT ALL PRIVILEGES ON *.* TO root@'localhost' IDENTIFIED BY '123456' with grant option;
flush privileges;
13.本机电脑Navicat创建数据库
扩展:策略内容
mysql> show variables like "%validate_password%";
3.4配置hive-site.xml
vi /opt/apps/hive-1.1.0-cdh5.7.6/conf/hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>192.168.10.101</value>
</property>
<property>
<name>hive.server2.long.polling.timeout</name>
<value>50000</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://cdh01:9083</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/root/warehouse</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://cdh02:3306/hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<!--是否在Hive提示中包含当前数据库-->
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<!--是否在Hive提示中子暗示查询头信息-->
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<!--让Hive决定是否在本地模式下自动运行-->
<property>
<name>hive.exec.mode.local.auto</name>
<value>true</value>
</property>
<!--hive-site.xml添加-->
<!--关闭metastore版本校验 hive2.x版本添加-->
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<!--hive2.x版本添加-->
<property>
<name>datanucleus.metadata.validate</name>
<value>false</value>
</property>
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>true</value>
</property>
</configuration>
3.5将mysql的驱动包 mysql-connector-java-5.1.46-bin.jar 上传到$HIVE_HOME/lib下
3.6测试hive
hive --service metastore &
hive --service hiveserver2 &
hive
4.安装HBase
4.1安装zookeeper
4.1.1解压安装
tar -zxvf zookeeper-3.4.5-cdh5.7.6.tar.gz -C /opt/apps
vi /etc/profile
...
source /etc/profile
4.1.2在zookeeper根目录下创建myid
echo "1" > /opt/apps/zookeeper-3.4.5-cdh5.7.6/myid
4.1.3配置zoo.cfg
mv zoo_sample.cfg zoo.cfg
vi zoo.cfg
...
#主要配置两个地方
dataDir=/opt/appszookeeper-3.4.5-cdh5.7.6 #myid存放的位置
# 添加三个服务器节点
server.1=cdh01:2888:3888
server.2=cdh02:2888:3888
server.3=cdh03:2888:3888
4.1.4给其他两台机器复制zookeeper,修改myid文件
scp -r /opt/apps/zookeeper-3.4.5-cdh5.7.6 cdh02:/opt/apps/
scp -r /opt/apps/zookeeper-3.4.5-cdh5.7.6 cdh03:/opt/apps/
echo "2" > /opt/apps/zookeeper-3.4.5-cdh5.7.6/myid
echo "3" > /opt/apps/zookeeper-3.4.5-cdh5.7.6/myid
4.1.5同步环境变量
scp -r /etc/profile cdh02:/etc/
scp -r /etc/profile cdh03:/etc/
source /etc/profile
4.1.6启动zookeeper保证启动完成
zkServer.sh start
4.2安装hbase
4.2.1解压安装并配置环境变量
tar -zxvf hbase-1.2.0-cdh5.7.6.tar.gz -C /opt/apps
vi /etc/profile
...
source /etc/profile
4.3配置hbase-env.sh
vi /opt/apps/hbase-1.2.0-cdh5.7.6/conf
...
# The java implementation to use. Java 1.7+ required.
export JAVA_HOME=/opt/apps/jdk1.8.0_45
...
# Tell HBase whether it should manage it's own instance of Zookeeper or not.
export HBASE_MANAGES_ZK=false
4.4配置hbase-site.xml
vi /opt/apps/hbase-1.2.0-cdh5.7.6/conf/hbase-site.xml
<configuration>
<!-- 指定hbase是分布式的 -->
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<!-- 指定hbase在HDFS上存储的路径 8020或者9000端口 -->
<property>
<name>hbase.rootdir</name>
<value>hdfs://cdh01:9000/hbase</value>
</property>
<!-- 指定zk的地址,多个用“,”分割 -->
<property>
<name>hbase.zookeeper.quorum</name>
<value>cdh01,cdh02,cdh03</value>
</property>
</configuration>
4.5配置regionservers+backup-masters文件
vi regionservers
cdh01
cdh02
cdh03
配置备份hmaster: 创建backup-masters文件
cd $HBASE_HOME/conf/
echo "cdh02">> backup-masters
4.6同步环境变量并source
scp -r /opt/apps/hbase-1.2.0-cdh5.7.6/ cdh02:/opt/apps/
scp -r /opt/apps/hbase-1.2.0-cdh5.7.6/ cdh03:/opt/apps/
scp -r /etc/profile cdh02:/etc/
scp -r /etc/profile cdh03:/etc/
source /etc/profile
4.7启动hbase并测试
启动hbase前确保三台机器zookeeper已启动
zkServer.sh start
start-dfs.sh
start-hbase.sh
hbase shell
5.安装Flume
5.1解压并配置环境变量
tar -zxvf flume-1.6.0-cdh5.7.6.tar.gz -C /opt/apps
vi /etc/profile
.....
source /etc/profile
5.2配置flume-env.sh
cd /opt/apps/flume-1.6.0-cdh5.7.6/conf
cp flume-env.sh.template flume-env.sh
vi flume-env.sh
...
# Enviroment variables can be set here.
export JAVA_HOME=/opt/apps/jdk1.8.0_45
5.3校验版本
flume-ng version
6.安装Sqoop
6.1解压配置环境变量
tar -zxvf sqoop-1.4.6-cdh5.7.6.tar.gz -C /opt/apps/
vi /etc/profile
...
source /etc/profile
6.2配置sqoop-env.sh
cd /opt/apps/sqoop-1.4.6-cdh5.7.6/conf
mv sqoop-env-template.sh sqoop-env.sh
vi sqoop-env.sh
...
#Set path to where bin/hadoop is available
export HADOOP_COMMON_HOME=/opt/apps/hadoop-2.6.0-cdh5.7.6
#Set path to where hadoop-*-core.jar is available
export HADOOP_MAPRED_HOME=/opt/apps/hadoop-2.6.0-cdh5.7.6
#set the path to where bin/hbase is available
export HBASE_HOME=/opt/apps/hbase-1.2.0-cdh5.7.6
#Set the path to where bin/hive is available
export HIVE_HOME=/opt/apps/hive-1.1.0-cdh5.7.6
#Set the path for where zookeper config dir is
export ZOOCFGDIR=/opt/apps/zookeeper-3.4.5-cdh5.7.6/conf
6.3将mysql的jdbc驱动包拷贝到sqoop的lib下
cp /opt/apps/hive-1.1.0-cdh5.7.6/lib/mysql-connector-java-5.1.46-bin.jar /opt/apps/sqoop-1.4.6-cdh5.7.6/lib/
6.4校验查看版本
sqoop version
20/08/17 20:06:00 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.7.6
Sqoop 1.4.6-cdh5.7.6
git commit id
Compiled by jenkins on Tue Feb 21 15:04:07 PST 2017
7.安装Nginx
7.1安装
## 1.解压
# tar -zxvf nginx-1.8.0.tar.gz -C /opt/apps/
## 2.安装c语言环境
yum -y install gcc-c++
## 3.
yum install -y pcre pcre-devel
## 4. 压缩格式
yum install -y zlib zlib-devel
## 5. https协议
yum install -y openssl openssl-devel
## 6. 指定安装路径
2. 执行配置
mkdir /opt/apps/nginx-1.8.0/tmp
./configure \
--prefix=/opt/apps/nginx-1.8.0 \
--pid-path=/opt/apps/nginx-1.8.0/tmp/nginx.pid \
--lock-path=/opt/apps/nginx-1.8.0/tmp/nginx.lock \
--error-log-path=/opt/apps/nginx-1.8.0/tmp/error.log \
--http-log-path=/opt/apps/nginx-1.8.0/tmp/access.log \
--with-http_gzip_static_module \
--http-client-body-temp-path=/opt/apps/nginx-1.8.0/tmp/client \
--http-proxy-temp-path=/opt/apps/nginx-1.8.0/tmp/proxy \
--http-fastcgi-temp-path=/opt/apps/nginx-1.8.0/tmp//fastcgi \
--http-uwsgi-temp-path=/opt/apps/nginx-1.8.0/tmp/uwsgi \
--http-scgi-temp-path=/opt/apps/nginx-1.8.0/tmp/scgi
## 7. 安装
make
make install
## 8. 测试sbin目录下
./nginx
./nginx -s stop
./nginx -s reload
7.2搭建简单的日志采集(*)
1.搭建web日志平台
## 1.idea中创建一个web项目
## 2. 将我提供的资源拷贝到这个新的项目中
## 3. 运行tomcat查询是否有两个提交的url
## 4. 修改analytics.js文件的serverUrl: "http://192.168.10.101/index.html"
## 5. 修改nginx.conf
server {
listen 80;
server_name 192.168.10.101;
location / {
root html;
index index.html index.htm;
}
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
}
[root@cdh01 conf]#../sbin/nginx -s reload
## 6. 就可以通过查看nginx的tmp/access.log观察到采集的日志情况
2.修改Nginx的conf文件
##^A ctrl+v+a
http {
include mime.types;
default_type application/octet-stream;
log_format my_log '$remote_addr^A$msec^A$http_host^A$request_uri';### 自定义日志格式
sendfile on;
keepalive_timeout 65;
server {
listen 80;
server_name 192.168.10.101;
access_log tmp/access.log my_log; ###引用自定义日志格式来产生日志
location / {
root html;
index index.html index.htm;
}
}
3.配置flume
##1. 将cdh02的flume拷贝到cdh01
[root@cdh02 apps]# scp -r flume-1.6.0-cdh5.7.6/ qphone01:/opt/apps/
##2. 在cdh01的flume的conf创建配置文件
[root@cdh01 conf]# vi web-log-js.conf
a1.sources = r1
a1.sinks = k1
a1.channels = c1
#source的配置
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /opt/apps/nginx-1.8.0/tmp/access.log
#channel的配置
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# sink的配置
a1.sinks.k1.type = avro
a1.sinks.k1.hostname =cdh02
a1.sinks.k1.port = 10086
a1.sinks.k1.batch-size = 100
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
##3. 在qphone02的flume的conf下创建flume配置文件
a1.sources = r1
a1.sinks = k1
a1.channels = c1
#source的配置
a1.sources.r1.type = avro
a1.sources.r1.bind = cdh02
a1.sources.r1.port = 10086
#channel的配置
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# sink的配置
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://qphone01:9000/hzbigdata2002_logs/%Y-%m-%d/
a1.sinks.k1.hdfs.filePrefix = qphone_log
a1.sinks.k1.hdfs.fileShuffix = .log
a1.sinks.k1.hdfs.batchSize = 100
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.writeFormat = Text
##sink的滚动
a1.sinks.k1.hdfs.rollInterval = 60
a1.sinks.k1.hdfs.rollSize = 512000
a1.sinks.k1.hdfs.rollCount = 1000000
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.roundUnit = minute
## sink的时间戳
a1.sinks.k1.hdfs.useLocalTimeStamp = true
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
##4. 后台启动cdh02的flume,在启动cdh01的flume
[root@cdh02 conf]# nohup flume-ng agent -c /opt/apps/flume-1.6.0-cdh5.7.6/conf/ -f /opt/apps/flume-1.6.0-cdh5.7.6/conf/web-log-js.conf -n a1 1>/dev/null 2>&1 &
[root@cdh01 conf]# nohup flume-ng agent -c /opt/apps/flume-1.6.0-cdh5.7.6/conf/ -f /opt/apps/flume-1.6.0-cdh5.7.6/conf/web-log-js.conf -n a1 1>/dev/null 2>&1 &
8.安装hue
8.1安装maven
##解压配置环境变量
tar -zxvf apache-maven-3.3.9-bin.tar.gz -C /opt/apps/
mv apache-maven-3.3.9/ maven-3.3.9
vi /etc/profile
...
source /etc/profile
##查看版本
mvn -v
Apache Maven 3.3.9 (bb52d8502b132ec0a5a3f4c09453c07478323dc5; 2015-11-11T00:41:47+08:00)
Maven home: /opt/apps/maven-3.3.9
Java version: 1.8.0_45, vendor: Oracle Corporation
Java home: /opt/apps/jdk1.8.0_45/jre
Default locale: zh_CN, platform encoding: UTF-8
OS name: "linux", version: "3.10.0-1062.el7.x86_64", arch: "amd64", family: "unix"
8.2安装ant
tar -zxvf apache-ant-1.8.1-bin.tar.gz -C /opt/apps/
mv apache-ant-1.8.1/ ant-1.8.1
vi /etc/profile
...
source /etc/profile
##查看版本
ant -version
Apache Ant version 1.8.1 compiled on April 30 2010
8.3安装hue
1. 建立普通用户,因为hue不能使用root账户安装启动
useradd hue
passwd hue
更改用户 hue 的密码 。
新的 密码:
无效的密码: 密码少于 8 个字符
重新输入新的 密码:
passwd:所有的身份验证令牌已经成功更新。
2. 解压
tar -zxvf hue-3.9.0-cdh5.7.6.tar.gz -C /opt/apps
3. 安装依赖
yum install asciidoc cyrus-sasl-devel cyrus-sasl-gssapi cyrus-sasl-plain gcc gcc-c++ krb5-devel libffi-devel libtidy libxml2-devel libxslt-devel make mysql mysql-devel openldap-devel python-devel sqlite-devel openssl-devel gmp-devel -y
4. 切换到hue的根目录
[root@cdh01 hue-3.9.0-cdh5.7.6]# make apps
5. 修改hue.ini
[root@cdh01 hue-3.9.0-cdh5.7.6]# vi desktop/conf/hue.ini
[desktop]
# Set this to a random string, the longer the better.
# This is used for secure hashing in the session store.
secret_key=jFE93j;2[290-eiw.KEiwN2s3['d;/.q[eIW^y#e=+Iei*@Mn<qW5o
# Webserver listens on this address and port
http_host=cdh01
http_port=8888
# Time zone name
time_zone=Asia/Shanghai
6. 尝试使用root账户启动hue
[root@cdh01 hue-3.9.0-cdh5.7.6]# ./build/env/bin/supervisor
发现不行
7. 给hue账户授权并配置并启动
[root@cdh01 apps]# chown -R hue:hue hue-3.9.0-cdh5.7.6/
[root@cdh01 apps]# vi /etc/sudoers
## Allow root to run any commands anywhere
root ALL=(ALL) ALL
hue ALL=(ALL) ALL
[root@cdh01 apps]# su hue
[hue@cdh01 hue-3.9.0-cdh5.7.6]$ ./build/env/bin/supervisor
8.4hue整合
8.4.1整合mysql
su hue
cd /opt/apps/hue-3.9.0-cdh5.7.6
[hue@cdh01 hue-3.9.0-cdh5.7.6]$ vi desktop/conf/hue.ini
# mysql, oracle, or postgresql configuration.
[[[mysql]]]
# Name to show in the UI.
nice_name="My SQL DB"
# For MySQL and PostgreSQL, name is the name of the database.
# For Oracle, Name is instance of the Oracle server. For express edition
# this is 'xe' by default.
## name=mysqldb
# Database backend to use. This can be:
# 1. mysql
# 2. postgresql
# 3. oracle
engine=mysql
# IP or hostname of the database to connect to.
host=192.168.10.102
# Port the database server is listening to. Defaults are:
# 1. MySQL: 3306
# 2. PostgreSQL: 5432
# 3. Oracle Express Edition: 1521
port=3306
# Username to authenticate with when connecting to the database.
user=root
# Password matching the username to authenticate with when
# connecting to the database.
password=123456
# Database options to send to the server when connecting.
# https://docs.djangoproject.com/en/1.4/ref/databases/
## options={}
重启hue
[hue@cdh01 hue-3.9.0-cdh5.7.6]$ ./build/env/bin/supervisor
8.4.2整合hdfs
##hdfs 历史日志启动指令 mr-jobhistory-daemon.sh start historyserver
1.修改配置文件
[hue@cdh01 hue-3.9.0-cdh5.7.6]$ vi desktop/conf/hue.ini
[hadoop]
# Configuration for HDFS NameNode
# ------------------------------------------------------------------------
[[hdfs_clusters]]
# HA support by using HttpFs
[[[default]]]
# Enter the filesystem uri
fs_defaultfs=hdfs://cdh01:9000
# NameNode logical name.
## logical_name=
# Use WebHdfs/HttpFs as the communication mechanism.
# Domain should be the NameNode or HttpFs host.
# Default port is 14000 for HttpFs.
webhdfs_url=http://cdh01:50070/webhdfs/v1
# Change this if your HDFS cluster is Kerberos-secured
## security_enabled=false
# In secure mode (HTTPS), if SSL certificates from YARN Rest APIs
# have to be verified against certificate authority
## ssl_cert_ca_verify=True
# Directory of the Hadoop configuration
hadoop_conf_dir=/opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop
hadoop_hdfs_home=/opt/apps/hadoop-2.6.0-cdh5.7.6
hadoop_bin=/opt/apps/hadoop-2.6.0-cdh5.7.6/bin
# Configuration for YARN (MR2)
# ------------------------------------------------------------------------
2.配置Hadoop的core-site.xml
sudo /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://cdh01:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata</value>
</property>
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>
</configuration>
3.配置Hadoop的hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata/hdfs/name</value>
</property>
<property>
<name>dfs.namenode.data.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata/hdfs/data</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata/hdfs/sname</value>
</property>
<property>
<name>dfs.namenode.checkpoint.edits.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata/hdfs/sname</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<!-- WebHDFS (REST API) in Namenodes and Datanodes. -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
4.配置完毕同步三台机器的文件
[root@cdh01 hadoop]# scp -r /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/ cdh02:/opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/
tip:出现Filesystem root '/' should be owned by 'hdfs'
解决方案:
请修改python变量,位置如下
vi desktop/libs/hadoop/src/hadoop/fs/webhdfs.py
修改其中的变量值为:
DEFAULT_HDFS_SUPERUSER = 'root' #设置的hue超级用户
8.4.3整合yarn
1.修改hue.ini
[hue@cdh01 hue-3.9.0-cdh5.7.6]$ vi desktop/conf/hue.ini
[[yarn_clusters]]
[[[default]]]
# Enter the host on which you are running the ResourceManager
resourcemanager_host=cdh01
# The port where the ResourceManager IPC listens on
resourcemanager_port=8032
# Whether to submit jobs to this cluster
submit_to=True
# Resource Manager logical name (required for HA)
## logical_name=
# Change this if your YARN cluster is Kerberos-secured
## security_enabled=false
# URL of the ResourceManager API
resourcemanager_api_url=http://cdh01:8088
# URL of the ProxyServer API
proxy_api_url=http://cdh01:8088
# URL of the HistoryServer API
history_server_api_url=http://cdh01:19888
# URL of the Spark History Server
## spark_history_server_url=http://localhost:18088
# In secure mode (HTTPS), if SSL certificates from YARN Rest APIs
# have to be verified against certificate authority
## ssl_cert_ca_verify=True
2.配置yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>cdh01</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>cdh01:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>cdh01:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>cdh01:8032</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>cdh01:8088</value>
</property>
</configuration>
3.配置完毕同步三台机器的yarn-site.xml
scp /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/yarn-site.xml cdh02:/opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/
8.4.4整合hive
1.修改hue.ini
[hue@cdh01 hue-3.9.0-cdh5.7.6]$ vi desktop/conf/hue.ini
[beeswax]
# Host where HiveServer2 is running.
# If Kerberos security is enabled, use fully-qualified domain name (FQDN).
hive_server_host=192.168.10.101
# Port where HiveServer2 Thrift server runs on.
hive_server_port=10000
# Hive configuration directory, where hive-site.xml is located
hive_conf_dir=/opt/apps/hive-1.1.0-cdh5.7.6/conf
# Timeout in seconds for thrift calls to Hive service
## server_conn_timeout=120
# Choose whether to use the old GetLog() thrift call from before Hive 0.14 to retrieve the logs.
# If false, use the FetchResults() thrift call from Hive 1.0 or more instead.
## use_get_log_api=false
# Limit the number of partitions that can be listed.
## list_partitions_limit=10000
# The maximum number of partitions that will be included in the SELECT * LIMIT sample query for partitioned tables.
## query_partitions_limit=10
# A limit to the number of rows that can be downloaded from a query.
# A value of -1 means there will be no limit.
# A maximum of 30,000 is applied to XLS downloads.
## download_row_limit=1000000
2.配置hive-site.xml
vi /opt/apps/hive-1.1.0-cdh5.7.6/conf/hive-site.xml
<configuration>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>192.168.10.101</value>
</property>
<property>
<name>hive.server2.long.polling.timeout</name>
<value>50000</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://cdh01:9083</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/root/warehouse</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://cdh02:3306/hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<!--是否在Hive提示中包含当前数据库-->
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<!--让Hive决定是否在本地模式下自动运行-->
<property>
<name>hive.exec.mode.local.auto</name>
<value>true</value>
</property>
</configuration>
3.启动hive服务
hive --service metastore &
hive --service hiveserver2 &
8.4.5整合zookeeper
修改hue.ini
[hue@cdh01 hue-3.9.0-cdh5.7.6]$ vi desktop/conf/hue.ini
[zookeeper]
[[clusters]]
[[[default]]]
# Zookeeper ensemble. Comma separated list of Host/Port.
# e.g. localhost:2181,localhost:2182,localhost:2183
host_ports=cdh01:2181,cdh02:2181,cdh03:2181
# The URL of the REST contrib service (required for znode browsing).
## rest_url=http://localhost:9998
# Name of Kerberos principal when using security.
## principal_name=zookeeper
8.4.6整合hbase
1.启动zookeeper集群
zkServer.sh start
2.修改hue.ini
[hue@cdh01 hue-3.9.0-cdh5.7.6]$ vi desktop/conf/hue.ini
[hbase]
# Comma-separated list of HBase Thrift servers for clusters in the format of '(name|host:port)'.
# Use full hostname with security.
# If using Kerberos we assume GSSAPI SASL, not PLAIN.
hbase_clusters=(Cluster|192.168.10.101:9090)
# HBase configuration directory, where hbase-site.xml is located.
hbase_conf_dir=/opt/apps/hbase-1.2.0-cdh5.7.6/conf
# Hard limit of rows or columns per row fetched before truncating.
## truncate_limit = 500
# 'buffered' is the default of the HBase Thrift Server and supports security.
# 'framed' can be used to chunk up responses,
# which is useful when used in conjunction with the nonblocking server in Thrift.
## thrift_transport=buffered
3.启动hbase
start-hbase.sh
hbase-daemon.sh start thrift
4.启动hue
[hue@cdh01 hue-3.9.0-cdh5.7.6]$ ./build/env/bin/supervisor
9.作业调度
9.0安装Oozie
9.0.1修改hadoop目录/etc/hadoopconf/下的配置文件
##1.core-site.xml
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
##2.mapred-site.xml 配置历史服务器
<property>
<name>mapreduce.jobhistory.address</name>
<value>cdh01:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>cdh01:19888</value>
</property>
##3.yarn-site.xml
<property>
<name>yarn.log.server.url</name>
<value>http://cdh01:19888/jobhistory/logs/</value>
</property>
##修改完毕重启hdfs
9.0.2安装oozie
9.0.2.1解压安装配置
##1解压安装
tar -zxvf /opt/software/oozie-4.1.0-cdh5.7.6.tar.gz -C /opt/apps/
vi /etc/profile
export OOZIE_HOME=/opt/apps/oozie-4.1.0-cdh5.7.6
export PATH=$PATH:$OOZIE_HOME/bin
source /etc/profile
##2. 进入到oozie的安装的根目录并解压其中一个包
tar -zxvf oozie-hadooplibs-4.1.0-cdh5.7.6.tar.gz -C ../
#tip:解压之后,这个目录下出现一个hadooplib的目录
##3在oozie根目录下创建libext目录
mkdir libext
##4. 拷贝依赖的jar包
##4.1 将hadooplibs目录下的jar包,拷贝到libext目录下
cp -ra hadooplibs/hadooplib-2.6.0-cdh5.7.6.oozie-4.1.0-cdh5.7.6/* libext/
##4.2 拷贝mysql的驱动jar包到libext目录下
cp /opt/software/mysql-connector-java-5*-bin.jar libext/
##4.3 将ext-2.2.zip文件拷贝到libext
cp /opt/software/ext-2.2.zip libext/
9.0.2.2配置conf/oozie-site.xml
<property>
<name>oozie.service.HadoopAccessorServiceh.hadoop.configurations</name>
<value>*=/opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/</value>
</property>
<property>
<name>oozie.service.JPAService.create.db.schema</name>
<value>true</value>
</property>
<property>
<name>oozie.service.JPAService.create.db.schema</name>
<value>true</value>
</property>
<property>
<name>oozie.service.JPAService.jdbc.driver</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>oozie.service.JPAService.jdbc.url</name>
<value>jdbc:mysql://cdh02:3306/oozie</value>
</property>
<property>
<name>oozie.service.JPAService.jdbc.username</name>
<value>root</value>
</property>
<property>
<name>oozie.service.JPAService.jdbc.password</name>
<value>123456</value>
</property>
<property>
<name>oozie.processing.timezone</name>
<value>GMT+0800</value>
</property>
#tip:windows连接cdh02创建oozie数据库utf8
9.0.2.3解压ext.zip文件
##下载解压缩ext.zip的工具
yum -y install unzip
##解压ext.zip到libext目录下
cd /opt/apps/oozie-4.1.0-cdh5.7.6/libext
unzip /opt/apps/oozie-4.1.0-cdh5.7.6/libext/ext-2.2.zip
9.0.3初始化Oozie
##1. oozie根目录下,上传oozie目录下的yarn.tar.gz文件到hdfs
cd /opt/apps/oozie-4.1.0-cdh5.7.6
./bin/oozie-setup.sh sharelib create -fs hdfs://cdh01:9000 -locallib oozie-sharelib-4.1.0-cdh5.7.6-yarn.tar.gz
#tip: 可能时间会长一点,执行成功之后,到50070去查看/user/root/share是否生成文件
##2. 创建oozie.sql文件
./bin/ooziedb.sh create -sqlfile oozie.sql -run
##3. 打包项目
./bin/oozie-setup.sh prepare-war
9.0.4启动测试
oozied.sh run/start/stop # 前台/后台/关闭
WebUI查看: cdh01:11000
9.1安装Azkaban
9.1安装solo服务器
## 1. 解压
tar -zxvf azkaban-solo-server-0.1.0-SNAPSHOT.tar.gz -C /opt/apps/
## 2. 配置用户权限
[root@cdh01 conf]# vi azkaban-users.xml
<azkaban-users>
<user groups="azkaban" password="azkaban" roles="admin" username="azkaban"/>
<user password="metrics" roles="metrics" username="metrics"/>
<user password="admin" roles="metrics,admin" username="admin"/> ##添加
<role name="admin" permissions="ADMIN"/>
<role name="metrics" permissions="METRICS"/>
</azkaban-users>
## 3. 启动:这里有一个坑,必须要在azkaban的根目录启动,否则启动不起来
[root@cdh01 azkaban-solo-server-0.1.0]# bin/start-solo.sh
[root@cdh01 azkaban-solo-server-0.1.0]# jps
## 4. 测试
http://192.168.10.101:8081/
9.2全分布式安装
cdh01 : web-server
cdh02 : exec-server
cdh03 : exec-server
9.2.1web-server装在cdh01
9.2.1.1解压并创建azkaban数据库及表操作
1.解压
tar -zxvf azkaban-web-server-0.1.0-SNAPSHOT.tar.gz -C /opt/apps/
mv azkaban-web-server-0.1.0-SNAPSHOT/ azkaban-web-server-0.1.0
2.创建azkaban所需要的数据库
CREATE DATABASE IF NOT EXISTS azkaban;
GRANT SELECT,INSERT,UPDATE,DELETE on azkaban.* to root@"%" identified by "123456";
flush privileges;
show grants for root@'%';
3.建表
## 1. 将cdh01的db的tar包拷贝到cdh02中
[root@cdh01 software]# scp azkaban-db-0.1.0-SNAPSHOT.tar.gz cdh02:/opt/software/
azkaban-db-0.1.0-SNAPSHOT.tar.gz
## 2. 解压db的tar
[root@cdh02 software]# tar -zxvf azkaban-db-0.1.0-SNAPSHOT.tar.gz
## 3. 在mysql中执行sql来进行建表
mysql> use azkaban;
Database changed
mysql> source /opt/software/azkaban-db-0.1.0-SNAPSHOT/create-all-sql-0.1.0-SNAPSHOT.sql
4.修改mysql的配置
[root@cdh01 azkaban]# vi /etc/my.cnf
在[mysqld]下添加
max_allowed_packet=1024M
9.2.1.2安装并配置文件
1.mysql驱动包拷贝到web-server中
##在web-server的根目录下创建目录存放驱动
[root@cdh01 azkaban-web-server-0.1.0]# mkdir ./extlib
[root@cdh01 azkaban-web-server-0.1.0]# cp /opt/apps/hive-1.1.0-cdh5.7.6/lib/mysql-connector-java-5.1.47-bin.jar ./extlib/
2.生成一个密钥的证书:生成ssl证书
[root@cdhe01 azkaban-web-server-0.1.0]# keytool -keystore keystore -alias jetty -genkey -keyalg RSA
输入密钥库口令:
再次输入新口令:
您的名字与姓氏是什么?
[Unknown]:
您的组织单位名称是什么?
[Unknown]:
您的组织名称是什么?
[Unknown]:
您所在的城市或区域名称是什么?
[Unknown]:
您所在的省/市/自治区名称是什么?
[Unknown]:
该单位的双字母国家/地区代码是什么?
[Unknown]: CN
CN=Unknown, OU=Unknown, O=Unknown, L=Unknown, ST=Unknown, C=CN是否正确?
[否]: y
输入 <jetty> 的密钥口令
(如果和密钥库口令相同, 按回车):
3.修改azkaban.properties
# Azkaban Personalization Settings
azkaban.name=Test
azkaban.label=My Local Azkaban
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=/opt/apps/azkaban-web-server-0.1.0/web
default.timezone.id=Asia/Shanghai
# Azkaban UserManager class
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=/opt/apps/azkaban-web-server-0.1.0/conf/azkaban-users.xml
# Loader for projects
executor.global.properties=/opt/apps/azkaban-exec-server-0.1.0/conf/global.properties
azkaban.project.dir=projects
# Velocity dev mode
velocity.dev.mode=false
# Azkaban Jetty server properties.
jetty.use.ssl=false
jetty.maxThreads=25
jetty.ssl.port=8443
jetty.port=8081
jetty.keystore=keystore
jetty.password=123456
jetty.keypassword=123456
jetty.truststore=keystore
jetty.trustpassword=123456
# Azkaban Executor settings
# mail settings
mail.sender=
mail.host=
# User facing web server configurations used to construct the user facing server URLs. They are useful when there is a reverse proxy between Azkaban web servers and users.
# enduser -> myazkabanhost:443 -> proxy -> localhost:8081
# when this parameters set then these parameters are used to generate email links.
# if these parameters are not set then jetty.hostname, and jetty.port(if ssl configured jetty.ssl.port) are used.
# azkaban.webserver.external_hostname=myazkabanhost.com
# azkaban.webserver.external_ssl_port=443
# azkaban.webserver.external_port=8081
job.failure.email=
job.success.email=
lockdown.create.projects=false
cache.directory=cache
# JMX stats
jetty.connector.stats=true
executor.connector.stats=true
# Azkaban mysql settings by default. Users should configure their own username and password.
database.type=mysql
mysql.port=3306
mysql.host=cdh02
mysql.database=azkaban
mysql.user=root
mysql.password=123456
mysql.numconnections=100
#Multiple Executor
azkaban.use.multiple.executors=true
#azkaban.executorselector.filters=StaticRemainingFlowSize,MinimumFreeMemory,CpuStatus
azkaban.executorselector.filters=StaticRemainingFlowSize,CpuStatus
azkaban.executorselector.comparator.NumberOfAssignedFlowComparator=1
azkaban.executorselector.comparator.Memory=1
azkaban.executorselector.comparator.LastDispatched=1
azkaban.executorselector.comparator.CpuUsage=1
4.配置azkaban-users.xml
<azkaban-users>
<user groups="azkaban" password="azkaban" roles="admin" username="azkaban"/>
<user password="metrics" roles="metrics" username="metrics"/>
<user password="admin" roles="metrics,admin" username="admin"/> ###新增
<role name="admin" permissions="ADMIN"/>
<role name="metrics" permissions="METRICS"/>
</azkaban-users>
9.2.2exec-server装在cdh02+cdh03上
1.将cdh01的exec的tar包拷贝给cdh02和cdh03
[root@cdh01 software]# scp azkaban-exec-server-0.1.0-SNAPSHOT.tar.gz cdh02:/opt/software/
[root@cdh01 software]# scp azkaban-exec-server-0.1.0-SNAPSHOT.tar.gz cdh03:/opt/software/
[root@cdh02 software]# tar -zxvf azkaban-exec-server-0.1.0-SNAPSHOT.tar.gz -C /opt/apps/
[root@cdh02 apps]# mv azkaban-exec-server-0.1.0-SNAPSHOT/ azkaban-exec-server-0.1.0
2.在exec的根目录下创建extlib用于存放mysql的驱动
##在exec-server的根目录下创建目录存放驱动
[root@cdh02 azkaban-web-server-0.1.0]# mkdir ./extlib
[root@qphone01 software]# scp /opt/apps/azkaban-web-server-0.1.0/extlib/mysql-connector-java-5.1.46-bin.jar cdh02:/opt/apps/azkaban-exec-server-0.1
[root@cdh03 azkaban-web-server-0.1.0]# mkdir ./extlib
[root@cdh01 software]# scp /opt/apps/azkaban-web-server-0.1.0/extlib/mysql-connector-java-5.1.46-bin.jar cdh03:/opt/apps/azkaban-exec-server-0.1
3.修改azkaban.properties
# Azkaban Personalization Settings
azkaban.name=Test
azkaban.label=My Local Azkaban
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=/opt/apps/azkaban-web-server-0.1.0/web
default.timezone.id=Asia/Shanghai
# Azkaban UserManager class
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=/opt/apps/azkaban-web-server-0.1.0/conf/azkaban-users.xml
# Loader for projects
executor.global.properties=/opt/apps/azkaban-exec-server-0.1.0/conf/global.properties
azkaban.project.dir=projects
# Velocity dev mode
velocity.dev.mode=false
# Azkaban Jetty server properties.
jetty.use.ssl=false
jetty.maxThreads=25
jetty.port=8081
# Where the Azkaban web server is located
azkaban.webserver.url=http://cdh01:8081
# mail settings
mail.sender=
mail.host=
# User facing web server configurations used to construct the user facing server URLs. They are useful when there is a reverse proxy between Azkaban web servers and users.
# enduser -> myazkabanhost:443 -> proxy -> localhost:8081
# when this parameters set then these parameters are used to generate email links.
# if these parameters are not set then jetty.hostname, and jetty.port(if ssl configured jetty.ssl.port) are used.
# azkaban.webserver.external_hostname=myazkabanhost.com
# azkaban.webserver.external_ssl_port=443
# azkaban.webserver.external_port=8081
job.failure.email=
job.success.email=
lockdown.create.projects=false
cache.directory=cache
# JMX stats
jetty.connector.stats=true
executor.connector.stats=true
# Azkaban plugin settings
azkaban.jobtype.plugin.dir=/opt/apps/azkaban-exec-server-0.1.0/plugins/jobtypes/
# Azkaban mysql settings by default. Users should configure their own username and password.
database.type=mysql
mysql.port=3306
mysql.host=cdh02
mysql.database=azkaban
mysql.user=root
mysql.password=123456
mysql.numconnections=100
# Azkaban Executor settings
executor.port=12321
executor.maxThreads=50
executor.flow.threads=30
4.向azkaban数据库中插入executor节点
[root@cdh01 ~]# mysql -uroot -p123456
mysql> use azkaban;
mysql> insert into executors (host,port,active) values ('cdh02','12321',1);
mysql> insert into executors (host,port,active) values ('cdh03','12321',1);
tip:
每一次你的executor的服务关闭之后,这个executors表中对应的执行器会失效,1会变成0.下一次你及时重新启动了exec的服务,也必须手动激活(将这个数值设置为1)
最后将cdh02的exec的目录拷贝给cdh03
[root@cdh02 apps]# scp -r azkaban-exec-server-0.1.0/ cdh03:/opt/apps/
9.3启动测试(启动修改0->1)
zkaban启动的顺序为,先启动executor,再启动web。否则web工程会因为找不到executor而启动失败
1.shutdown-exec.sh
[root@cdh02 bin]# /opt/apps/azkaban-exec-server-0.1.0/bin start-exec.sh
[root@cdh03 bin]# /opt/apps/azkaban-exec-server-0.1.0/bin start-exec.sh
2.
修改azkaban数据库的executors表0->1
3.shutdown-web.sh
[root@cdh01 bin]# /opt/apps/azkaban-web-server-0.1.0/bin start-web.sh
9.4注意事项
1、azkaban job Preparing
解决方法:
修改 web-server conf/azkaban.properties 配置。
# execute 主机过滤器配置, 去掉 MinimumFreeMemory
# MinimumFreeMemory 过滤器会检查 executor 主机空余内存是否会大于 6G,如果不足 6G,则 web-server 不会将任务交由该主机执行
azkaban.executorselector.filters=StaticRemainingFlowSize,CpuStatus
2、 运行job时,azkaban的web后台报错 Free memory amount minus Xmx (2836204 - 0 kb) is less than low mem threshold (3145728 kb), memory request declined
解决方法:
[root@cdh02 executor]# vi ./plugins/jobtypes/commonprivate.properties
# set execute-as-user
execute.as.user=false
memCheck.enabled=false #添加内存检查关闭 ,否则报错不足3G
10.安装Spark
10.1安装jdk+hadoop
10.2安装scala
tar -zxvf scala-2.11.8.tgz -C /opt/apps/
vi /etc/profile
#envrioment
export JAVA_HOME=/opt/apps/jdk1.8.0_45
export HADOOP_HOME=/opt/apps/hadoop-2.6.0-cdh5.7.6
export SCALA_HOME=/opt/apps/scala-2.11.8
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SCALA_HOME/bin
scala -version
10.3安装spark
//1. 解压以及配置环境变量
tar -zxvf spark-2.2.0-bin-hadoop2.7.tgz -C /opt/apps/
mv spark-2.2.0-bin-hadoop2.7/ spark-2.2.0
vi /etc/profile
#envrioment
export JAVA_HOME=/opt/apps/jdk1.8.0_45
export HADOOP_HOME=/opt/apps/hadoop-2.6.0-cdh5.7.6
export SCALA_HOME=/opt/apps/scala-2.11.8
export SPARK_HOME=/opt/apps/spark-2.2.0
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SCALA_HOME/bin
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
//2. 修改spark的配置
mv spark-env.sh.template spark-env.sh
mv slaves.template slaves
//2.1 从机的ip
[root@qphone01 conf]# vi slaves
# A Spark Worker will be started on each of the machines listed below.
cdh01
cdh02
cdh03
//2.2 启动环境
[root@cdh01 conf]# vi spark-env.sh
export JAVA_HOME=/opt/apps/jdk1.8.0_45
export SCALA_HOME=/opt/apps/scala-2.11.8
export SPARK_MASTER_IP=cdh01
export SPARK_MASTER_PORT=7077 ##rpc通信端口,类似hdfs的9000端口,不是50070
export SPARK_WORKER_CORES=2 ##每一个spark的worker从节点拥有几个cpu的core
export SPARK_WORKER_INSTANCES=1 ##每一台spark服务器上面拥有几个worker的实例
export SPARK_WORKER_MEMORY=1g ##每一个worker节点的内存资源
export HADOOP_CONF_DIR=/opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop
//3. 同步到其他两台节点
//3.1 同步scala
scp -r /opt/apps/scala-2.11.8/ cdh02:/opt/apps/
scp -r /opt/apps/scala-2.11.8/ cdh03:/opt/apps/
//3.2 同步spark
scp -r /opt/apps/spark-2.2.0/ cdh02:/opt/apps/
scp -r /opt/apps/spark-2.2.0/ cdh03:/opt/apps/
//3.3 同步环境变量
scp -r /etc/profile cdh02:/etc/profile
scp -r /etc/profile cdh03:/etc/profile
source /etc/profile
//3.4 修改cdh01的启动和关闭spark集群的脚本名称,因为和hadoop的启动脚本同名了
[root@cdh01 sbin]# mv start-all.sh start-spark-all.sh
[root@cdh01 sbin]# mv stop-all.sh stop-spark-all.sh
//4.启动spark集群
//1. 启动集群
[root@cdh01 sbin]# start-all-spark.sh
//2. 测试集群
http://192.168.10.101:8080
//3. 停止集群
[root@cdh01 sbin]# stop-all-spark.sh
10.4sparkHA
基于zookeeper的HA搭建
1.只需要修改spark-env.sh
vi /opt/apps/spark-2.2.0/conf/spark-env.sh
添加以下内容
export JAVA_HOME=/opt/apps/jdk1.8.0_45
export SCALA_HOME=/opt/apps/scala-2.11.8
#export SPARK_MASTER_IP=cdh01
#export SPARK_MASTER_PORT=7077 ##rpc通信端口,类似hdfs的9000端口,不是50070
export SPARK_WORKER_CORES=2 ##每一个spark的worker从节点拥有几个cpu的core
export SPARK_WORKER_INSTANCES=1 ##每一台spark服务器上面拥有几个worker的实例
export SPARK_WORKER_MEMORY=1g ##每一个worker节点的内存资源
export HADOOP_CONF_DIR=/opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop
##spark HA配置
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER
-Dspark.deploy.zookeeper.url=cdh01,cdh02,cdh03 -Dspark.deploy.zookeeper.dir=/spark"
2.分发同步
scp spark-env.sh cdh02:/opt/apps/spark-2.2.0/conf/
3.启动测试
三台机器启动zookeeper
zkServer.sh start
start-spark-all.sh
10.5注意事项
1.HA或普通集群启动时报错
#cdh03: failed to launch: nice -n 0 /opt/apps/spark-2.2.0/bin/spark-class org.apache.spark.deploy.worker.Worker --webui-port 8081 spark://cdh01:7077
##解决1修改/root/.bashrc文件
vi ~/.bashrc
文件添加JAVA_HOME
JAVA_HOME=/opt/apps/jdk1.8.0_45
export SPARK_HOME=/opt/apps/spark-2.2.0
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
然后source
source ~/.bashrc
##解决2修改spark-config.sh
在该文件中添加JAVA_HOME
vi /opt/apps/spark-2.2.0/sbin/spark-config.sh
JAVA_HOME=/opt/apps/jdk1.8.0_45
三台机器同步该文件重新启动spark
scp spark-config.sh cdh02:/opt/apps/spark-2.2.0/sbin/
webui查看cdh01:8080
2.spark2与hive冲突解决:启动hive报错
ls: 无法访问/opt/apps/spark-2.2.0/lib/spark-assembly-*.jar: 没有那个文件或目录
解决:
##1.进入到hive的lib下面,修改hive启动脚本
cd /opt/apps/hive-1.1.0-cdh5.7.6/bin/
vi hive
找到大约114行,然后修改
sparkAssemblyPath=ls ${SPARK_HOME}/lib/spark-assembly-*.jar
为sparkAssemblyPath=ls ${SPARK_HOME}/jars/*.jar
##2.替换保存重新启动hive
11安装Kafka
11.1安装zookeeper
11.2安装kafka
11.2.1解压安装并配置环境环境变量
tar -zxvf kafka_2.11-1.1.1.tgz -C /opt/apps/
mv kafka_2.11-1.1.1/ kafka-2.11
vi /etc/profile
11.2.2修改server.properties
vi /opt/apps/kafka-2.11/config/server.properties
修改broker.id
############################# Server Basics #############################
# The id of the broker. This must be set to a unique integer for each broker.
broker.id=1
修改log.dirs
############################# Log Basics #############################
# A comma separated list of directories under which to store log files
log.dirs=/opt/apps/kafka-2.11/data/kafka
修改zookeeper.connect
############################# Zookeeper #############################
# Zookeeper connection string (see zookeeper docs for details).
# This is a comma separated host:port pairs, each corresponding to a zk
# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
# You can also append an optional chroot string to the urls to specify the
# root directory for all kafka znodes.
zookeeper.connect=cdh01,cdh02,cdh03/kafka
11.2.3同步分发kafka+环境变量并修改server.properties的broker.id
scp /etc/profile cdh02:/etc/
source /etc/profile
scp -r /opt/apps/kafka-2.11/ cdh02:/opt/apps/
vi /opt/apps/kafka-2.11/config/server.properties
11.2.4启动Kafka
#1.启动zookeeper 三台机器都要启动
zkServer.sh start
#2.启动kafka 三台机器都要启动
kafka-server-start.sh -daemon $KAFKA_HOME/config/server.properties
#关闭
kafka-server-stop.sh stop
11.2.5注意事项
#tip:kafka集群宕机
查看/opt/apps/kafka-2.11/data/kafka/meta.properties的broker.id是否与设置的一致!
vi /opt/apps/kafka-2.11/data/kafka/meta.properties
或者删除/data/kafka目录重新启动
12.安装Redis
单机安装
1.解压安装
tar -zxvf redis-3.2.8.tar.gz -C /opt/apps/
2.安装redis插件
yum -y install gcc-c++
3.在redis根目录下编译
make
make PREFIX=/opt/apps/redis-3.2.8 install
4.启动测试
[root@cdh01 bin]# ./redis-server
[root@cdh01 bin]# ./redis-cli
127.0.0.1:6379> ping
PONG
13.安装Elasticsearch
13.1安装
1.解压安装
tar -zxvf elasticsearch-6.5.3.tar.gz -C /opt/apps/
2.配置环境变量
vi /etc/profile
...
export ELASTICSEARCH_HOME=/opt/apps/elasticsearch-6.5.3
export PATH=$PATH:$ELASTICSEARCH_HOME/bin
...
source /etc/profile
3.配置config/的elasticsearch.yml文件
vi /opt/apps/elasticsearch-6.5.3/config/elasticsearch.yml
cluster.name: es-bigdata
node.name: cdh01
node.master: true
node.data: true
path.data: /opt/apps/elasticsearch-6.5.3/data
path.logs: /opt/apps/elasticsearch-6.5.3/logs
network.host: 0.0.0.0
discovery.zen.ping.unicast.hosts: ["cdh01", "cdh02", "cdh03"]
4.创建普通用户
useradd cdh01
passwd cdh01
123456 #密码
授权vi /etc/sudoers
5.赋予创建的普通用户权限
vi /etc/sudoers
## Allow root to run any commands anywhere
root ALL=(ALL) ALL
cdh01 ALL=(ALL) ALL
6.赋予用户对elasticsearch的权限
chown -R cdh01:cdh01 /opt/apps/elasticsearch-6.5.3
7.另外两台机器分发并修改环境变量+yml文件
scp -r /opt/apps/elasticsearch-6.5.3 cdh02:/opt/apps/
chown -R cdh02:cdh02 /opt/apps/elasticsearch-6.5.3
chown -R cdh03:cdh03 /opt/apps/elasticsearch-6.5.3
8.启动测试 (会报三个错)
#切换普通用户启动!
elasticsearch的bin目录下的
elasticsearch
#WebUI查看:ip:9200
13.2启动报错解决
#1..max file descriptors [4096] for elasticsearch process likely too low, increase to at least [65536] elasticsearch
vi /etc/security/limits.conf
#添加内容:(*不能省略)
* soft nofile 65536
* hard nofile 131072
* soft nproc 2048
* hard nproc 4096
#2.max number of threads [1024] for user [judy2] likely too low, increase to at least [4096] (CentOS7.x 不用改)
vi /etc/security/limits.d/90-nproc.conf
修改如下内容:
* soft nproc 1024
#修改为
* soft nproc 4096
#3.max virtual memory areas vm.max_map_count [65530] likely too low, increase to at least [262144] (CentOS7.x 不用改)
/etc/sysctl.conf 文件最后添加一行
vm.max_map_count=262144
修改完毕重启虚拟机,切换普通用户重新启动elasticsearch
网页查看ip:9200
13.3Elasticsearch插件Kibana安装
tar -zxvf kibana-6.5.3-linux-x86_64.tar.gz -C /opt/apps/
mv kibana-6.5.3-linux-x86_64/ kibana-6.5.3
export KIBANA_HOME=/opt/apps/kibana-6.5.3
export PATH=$PATH:$KIBANA_HOME/bin
配置config/kibana.yml
vi /opt/apps/kibana-6.5.3/config/kibana.yml
# Kibana is served by a back end server. This setting specifies the port to use.
server.port: 5601
# To allow connections from remote users, set this parameter to a non-loopback address.
server.host: "192.168.10.101"
# The Kibana server's name. This is used for display purposes.
server.name: "cdh01"
# The URL of the Elasticsearch instance to use for all your queries.
elasticsearch.url: "http://cdh01:9200"
前台启动测试 kibana serve
后台启动
nohup kibana serve > /dev/null 2>&1 &
查看进程
ps -ef | grep kibana
13.4中文分词器ik安装
##1. 安装解压工具
yum -y install unzip
##2. 上传ik分词器
##3. 将ik分词器拷贝到es的plugins目录
mkdir -p /opt/apps/elasticsearch-6.5.3/plugins/ik && mv /opt/software/elasticsearch-analysis-ik-6.5.3.zip /opt/apps/elasticsearch-6.5.3/plugins/ik && cd /opt/apps/elasticsearch-6.5.3/plugins/ik
##4. 解压
unzip elasticsearch-analysis-ik-6.5.3.zip && rm -f elasticsearch-analysis-ik-6.5.3.zip
##5. 分发
scp -r /opt/apps/elasticsearch-6.5.3/plugins/ik cdh02:/opt/apps/elasticsearch-6.5.3/plugins/ && scp -r /opt/apps/elasticsearch-6.5.3/plugins/ik cdh03:/opt/apps/elasticsearch-6.5.3/plugins/
##6. 重启es集群
14.安装Promethus+Supervisor+Grafana
14.1promethus
##Prometheus安装
# 1. 创建目录,放置Prometheus
cd /opt/software
# 2. 将tar包上传到此目录
# 3. 解压
tar -xvzf prometheus-2.17.1.linux-amd64.tar.gz -C /opt/apps
# 4. 创建软连
cd /opt/apps/ && ln -s prometheus-2.17.1.linux-amd64 prom
# 5. 创建目录,存放Prometheus 拉取过来的数据,我们这里选择local storage
mkdir -p /opt/data1/prometheus/data/
# 6. 启动prometheus
/opt/apps/prom/prometheus \
--storage.tsdb.path="/opt/data1/prometheus/data/" \
--log.level=debug \
--web.enable-lifecycle \
--web.enable-admin-api \
--config.file=/opt/apps/prom/prometheus.yml
## tip: 以上参数的作用
--storage.tsdb.path : 指标存储的基本路径
--log.level : 仅记录具有给定严重性或更高严重性的消息。 下列之一:[debug,info,warn, error]
--web.enable-lifecycle : 通过HTTP请求启用\关机\重新加载prometheus服务。
--web.enable-admin-api : 管理员启用API操作
--config.file : 指定prometheus的配置文件路径
webUI
ip:9090
14.2supervisor
##supervisor安装
1.
# 我们通过yum安装,先按照epel源
yum install -y epel-release
# 安装supervisor
yum install -y supervisor
# 安装完毕后,我们的supervisord的配置文件默,安装在/etc/supervisord.conf, 我们不需要默认的配置文件,我们编写自己的配置文件,因此我们先把默认配置文件备份一下,执行如下命令
mv /etc/supervisord.conf /etc/supervisord.conf.bak
#之后我们编写自己的配置文件supervisord.conf 放到 /etc/ 目录下
2.vi /etc/supervisord.conf.bak
[inet_http_server]
port=0.0.0.0:9001
3. vi /etc/supervisord.conf
; filename supervisord.conf
[unix_http_server]
file=/var/run/supervisor/supervisor.sock ; socket 文件路径,supervisorctl使用XML_RPC和supervisord 的通信就是通他进行,必须设置,不然supervisorctl就不可用了
;chmod=0700 ; socket文件的mode,默认是0700
;chown=nobody:nogroup ; socket文件的owner,格式:uid:gid
[inet_http_server] ; 监听在TCP上的scoket,如果想使用Web Server或者使用远程的Supervisorclt,就必须开启,默认不开启,我们这里开启
port=0.0.0.0:9001 ; 监听的地址和端口
;username=user ; 可以指定用户名密码,我们这里不开启了
;password=123 ;
[supervisord] ; supervisord 主进程的相关配置
logfile=/var/log/supervisor/supervisord.log ; 主进程的日志文件
logfile_maxbytes=50MB ; 日志文件多大后会滚动生成一个新的日志文件 默认50MB
logfile_backups=10 ; 最多备份多少个日志文件,默认10 个
loglevel=info ; log level; default info; others: debug,warn,trace
pidfile=/var/run/supervisor/supervisord.pid ;主进程的 pid文件路径
nodaemon=false ; 主进程是否在前台运行,默认是false,在后台运行
minfds=1024 ; 最少系统空闲的文件描述符,低于这个值,supervisor将不会启动 默认 1024
minprocs=1024 ; 最小可用进程描述符 低于这个值,supervisor将不会启动 默认200
user=root ; 启动supervisord 的用户
[rpcinterface:supervisor]; 这个选项是给XML_RPC用的,如果使用supervisord 和webserver这个选项必须开启
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
serverurl=unix:///var/run/supervisor/supervisor.sock ; 本地连接supervisord的时候使用,注意和[unix_http_server]配置对应
serverurl=http://192.168.10.101:9001 ; 远程连接supervisord
;username= ; 可以指定用户名密码,我们这里不开启了
;password= ;
[include] ;分离的配置文件,我们可以总是将我们应用的配置文件放到单独的目录文件下管理,这样配置清晰,下面是配置的分离配置文件的路径,supervisord会加载我们配置到对于文件加下的文件
files = /etc/supervisord.d/*.conf ; 匹配 /etc/supervisord.d/ 下所有以 .conf 结尾的文件
4.
# 创建pid文件
mkdir -p /var/run/supervisor/
touch /var/run/supervisor/supervisord.pid
# 创建分类配置目录
mkdir -p /etc/supervisord.d/
# 创建日志目录
mkdir -p /var/log/supervisor/
5.在supervisord.d目录下创建promethus.conf
vim /etc/supervisord.d/prometheus.conf
; prometheus 启动配置文件
[program:prometheus] ; 我们的应用配置格式为 program:我们应用名称(自己定义)
directory=/opt/apps/prom ; 运行程序前会切换到配置的目录中
command=/opt/apps/prom/prometheus --storage.tsdb.path="/opt/data1/prometheus/data/" --log.level=debug --web.enable-lifecycle --web.enable-admin-api --config.file=/opt/apps/prom/prometheus.yml ; 我们要执行的命令,这就是之前我们再前台启动prometheus的命令
stderr_logfile=/var/log/supervisor/prometheus.err ;错误日志文件
stdout_logfile=/var/log/supervisor/prometheus.log ;标准输出日志文件,我们通过该文件查看Prometheus运行日志
stdout_logfile_maxbytes=10MB ; 标准输出日志文件多大滚动一次
stdout_logfile_backups=10 ; 标准输出日志文件最多备份多少个
user=root ; 以什么用户启动
autostart=true ; 是否在supervisord启动时,直接就启动应用
autorestart=true ; crash 后是否自动重启
startsecs=10 ;应用进程启动多少秒之后,此时状态如果是running状态,就认为是成功
startretries=3 ; 当进程启动失败后,最大尝试启动的次数, 如果超过指定次数,应用会被标记为Fail状态
stopasgroup=true ; 是否停止由应用本身创建的子进程,此选项接受的停止信号是stop信号
killasgroup=true ; 是否停止由应用本身创建的子进程,此选项接受的停止信号是SIGKILL信号
redirect_stderr=false ; 如果是true,stderr的日志会被写入stdout日志文件中
6.启动测试supervisor WebUI:ip:9001
# 开机启动
systemctl enable supervisord
# 启动supervisord
systemctl start supervisord
# 查看启动状态
systemctl status supervisord
# 查看tcp端口是否在监听中
netstat -antp |grep 9001
# 将上述编写配置文件prometheus.conf 放到 /etc/supervisord.d/ 目录后,执行如下敏力
# 读取配置文件
supervisorctl reread
# 更新启动Prometheus
supervisorctl update prometheus
# 查看启动状态,如果一切ok,将看到如下图信息
supervisorctl status prometheus
# 如果想停止grafana
supervisorctl stop prometheus
# 如果想再次启动
supervisorctl start prometheus
# 注意一旦你修改了配置文件内容,一定要先reread,然后 update 就可以了
14.3grafana
##Grafana安装
1.
# 上传/opt/software并解压
tar -xvzf grafana-6.7.3.linux-amd64.tar.gz -C /opt/apps && cd /opt/apps
# 创建软连
ln -s grafana-6.7.3 graf && cd /opt/apps/graf
# copy 一个配置,对于grafana我们不做更多的配置说明,直接copy一个默认的配置
cp /opt/apps/graf/conf/sample.ini /opt/apps/graf/conf/grafana.ini
2.supervisor管理grafana的配置
vi /etc/supervisord.d/grafana.conf
; grafana 启动配置文件
[program:grafana] ; 我们的应用配置格式为 program:我们应用名称(自己定义)
directory=/opt/apps/graf/ ; 运行程序前会切换到配置的目录中
command=sh -c "bin/grafana-server -config conf/grafana.ini" ; 我们要执行的命令
stderr_logfile=/var/log/supervisor/grafana.err ;错误日志文件
stdout_logfile=/var/log/supervisor/grafana.log ;标准输出日志文件,我们通过该文件查看grafana运行日志
stdout_logfile_maxbytes=10MB ; 标准输出日志文件多大滚动一次
stdout_logfile_backups=10 ; 标准输出日志文件最多备份多少个
user=root ; 以什么用户启动
autostart=true ; 是否在supervisord启动时,直接就启动应用
autorestart=true ; crash 后是否自动重启
startsecs=10 ;应用进程启动多少秒之后,此时状态如果是running状态,就认为是成功
startretries=3 ; 当进程启动失败后,最大尝试启动的次数, 如果超过指定次数,应用会被标记为Fail状态
stopasgroup=true ; 是否停止由应用本身创建的子进程,此选项接受的停止信号是stop信号
killasgroup=true ; 是否停止由应用本身创建的子进程,此选项接受的停止信号是SIGKILL信号
redirect_stderr=false ; 如果是true,stderr的日志会被写入stdout日志文件中
3.启动测试 WebUI:ip:3000 用户名密码admin
# 将上述编写的 grafana.conf 配置文件,放到 /etc/supervisord.d/ 文件夹下
# 执行如下命令
# 加载grafana配置
supervisorctl reread
# 更新启动grafana
supervisorctl update grafana
# 查看启动状态
supervisorctl status
# 如果想停止grafana
supervisorctl stop grafana
# 如果想再次启动
supervisorctl start grafana
# 注意一旦你修改了配置文件内容,一定要先reread,然后 update 就可以了
4.访问网页端192.168.10.101:3000 为grafana配置我们的Prometheus数据源
主页面 -> Add data source -> HTTP:URL:http://192.168.10.101:9000 -> Save & Test -> Back
444.grafana监控nginx
1.
# 1.将nginx-lua-prometheus.tgz拷贝到collect-app/lua目录下
# 2.解压
tar -xvzf nginx-lua-prometheus.tgz -C /opt/apps && rm -rf nginx-lua-prometheus.tgz
# 3.修改collect-app/conf/nginx.conf
# 修改 /opt/apps/collect-app/conf/nginx.conf, 在http模块中增加如下,添加lua prometheus库
vi /opt/apps/collect-app/conf/nginx.conf
lua_package_path "/opt/apps/nginx-lua-prometheus/?.lua;;";
2.创建metric.conf文件
vi /opt/apps/collect-app/conf/vhost/metric.conf
lua_shared_dict prometheus_metrics 10M;
init_by_lua_block {
-- 初始化Prometheus
prometheus = require("prometheus").init("prometheus_metrics")
-- 定义一个counter类型metric,记录http请求个数,metric的标签为 主机 端口 请求路径 请求转态码
http_requests_endpoint = prometheus:counter("nginx_http_requests_endpoint", "Number of HTTP requests_endpoint",{"host","port","endpoint", "status"})
-- 定义一个histogram类型的metric,记录http 请求时间,metric 标签依然为 主机 端口 请求路径 请求转态码
-- 这里我们使用默认的 5ms-10s 之间的20个bucket来记录请求时间分布
http_request_time = prometheus:histogram("nginx_http_request_time","HTTP request time"
,{"host","port","endpoint", "status"})
-- 定义一个gauge类型metric,记录nginx的连接数,标签为nginx的连接状态
http_connections = prometheus:gauge("nginx_http_connections","Number of HTTP connections", {"state"})
}
init_worker_by_lua 'prometheus:init_worker()';
log_by_lua_block {
-- 请求的主机名
local host = ngx.var.host
-- 请求的url路径
local endpoint = ngx.var.uri
-- 状态码
local status = ngx.var.status
-- 端口号
local port = ngx.var.server_port
-- 如果请求是一些静态文件,则统一归并为一类
if string.find(endpoint, "static") ~= nil or string.find(endpoint, ".js") ~= nil or string.find(endpoint, ".css") ~= nil or string.find(endpoint, ".jpg") ~= nil or string.find(endpoint, ".html") ~= nil or string.find(endpoint, ".ico") ~= nil then
endpoint = "static"
status = "static"
else
endpoint = ngx.var.uri
end
-- 请求数的 metric
if endpoint ~= nil then
http_requests_endpoint:inc(1, {host,port,endpoint,status})
end
local request_time = ngx.var.request_time
-- 请求时间的 metric
if endpoint ~= nil and request_time~= nil then
http_request_time:observe(tonumber(request_time), {host,port,endpoint,status})
end
}
server {
listen 9527;
# 暴露metrics 接口给Prometheus 拉取数据
location /metrics {
content_by_lua_block {
-- nginx 连接状态
if ngx.var.connections_active ~= nil then
http_connections:set(ngx.var.connections_active, {"active"})
http_connections:set(ngx.var.connections_reading, {"reading"})
http_connections:set(ngx.var.connections_waiting, {"waiting"})
http_connections:set(ngx.var.connections_writing, {"writing"})
end
prometheus:collect()
}
}
}
3.加载metric接口配置
# 执行测试配置文件
openresty -p /opt/apps/collect-app/ -t #测试
# 如果一切OK,执行加载配置命令. 之前采集的服务我们已经启动nginx,如果未启动请先启动nginx
openresty -p /opt/apps/collect-app/ -s reload
# 验证一下,执行如下命令请求我们配置好metrics接口,会看到类似如下metric信息,表示OK
curl 192.168.10.101:9527/metrics
4. 修改prometheus:prometheus.yml (直接删除全部拷贝)
vi /opt/apps/prom/prometheus.yml
# filename : prometheus.yml
# 全局配置
global:
scrape_interval: 15s # 设置每15秒pull一次数据,默认是1min
# 每一个job的配置,
scrape_configs:
# 默认的metric路径是 '/metrics'
# scheme defaults to 'http'.
# Prometheus 自己的metric
- job_name: 'prometheus'
static_configs:
- targets: ['192.168.10.101:9090']
- job_name: 'collect-app-nginx'
scrape_interval: 5s
static_configs:
- targets: ['192.168.10.101:9527']
5.热启动
curl -X POST http://192.168.10.101:9090/-/reload
6.访问网页端192.168.10.101:3000 将json文件导入
+ -> Upload .json file -> 将collect-app-nginx.json文件导入即可 -> Load
15.安装presto
15.1server
#解压安装服务端
tar -zxvf /opt/software/presto/presto-server-0.236.tar.gz -C /opt/apps
# 创建软连
ln -s /opt/apps/presto-server-0.236/ /opt/apps/presto-server
# 安装目录下创建etc目录
cd /opt/apps/presto-server && mkdir etc
# 创建节点数据目录
mkdir -p /data1/presto/data
ln -s /opt/apps/presto-server-0.236/etc /data1/presto/data/etc
ln -s /opt/apps/presto-server-0.236/plugin /data1/presto/data/plugin
# 接下来创建配置文件
cd /opt/apps/presto-server/etc/
# config.properties persto server的配置
[root@zhuguofu etc]#
cat << EOF > config.properties
coordinator=true
node-scheduler.include-coordinator=true
# 端口默认是8080
http-server.http.port=8082
# 单个查询在整个集群上够使用的最大用户内存
query.max-memory=3GB
# 单个查询在每个节点上可以使用的最大用户内存
query.max-memory-per-node=1GB
# 单个查询在每个节点上可以使用的最大用户内存+系统内存(user memory: hash join,agg等,system memory:input/output/exchange buffers等)
query.max-total-memory-per-node=2GB
discovery-server.enabled=true
discovery.uri=http://0.0.0.0:8082
EOF
# node.properties 节点配置
[root@zhuguofu etc]#
cat << EOF > node.properties
node.environment=production
node.id=zhuguofu
node.data-dir=/data1/presto/data
EOF
#jvm.config 配置,注意-DHADOOP_USER_NAME配置,替换为你需要访问hdfs的用户
[root@zhuguofu etc]#
cat << EOF > jvm.config
-server
-Xmx3G
-XX:+UseG1GC
-XX:G1HeapRegionSize=32M
-XX:+UseGCOverheadLimit
-XX:+ExplicitGCInvokesConcurrent
-XX:+HeapDumpOnOutOfMemoryError
-XX:+ExitOnOutOfMemoryError
-DHADOOP_USER_NAME=root
EOF
#log.properties
#default level is INFO. `ERROR`,`WARN`,`DEBUG`
[root@zhuguofu etc]#
cat << EOF > log.properties
com.facebook.presto=INFO
EOF
# catalog配置,就是各种数据源的配置,我们使用hive,注意替换为你自己的thrift地址
mkdir -p /opt/apps/presto-server/etc/catalog
[root@hadoop01 etc]#
cat <<EOF > catalog/hive.properties
connector.name=hive-hadoop2
hive.metastore.uri=thrift://192.168.10.101:9083
hive.allow-drop-table=true
hive.parquet.use-column-names=true
hive.allow-rename-column=true
hive.allow-rename-table=true
EOF
# 添加hudi支持--- 不能乱写名称hive-hadoop2
wget -P /opt/apps/presto-server/plugin/hive-hadoop2 http://doc.yihongyeyan.com/qf/project/soft/hudi/hudi-presto-bundle-0.5.2-incubating.jar
#启动presto-server
[root@zhuguofu presto-server]# ./bin/launcher start
[root@zhuguofu presto-server]# ./bin/launcher stop
#可以配置环境变量
export PRESTO_HOME=/opt/apps/presto-server
export PATH=$PATH:$PRESTO_HOME/bin
#启动
launcher start
launcher stop
15.2client
# 客户端安装
mkdir -p /opt/apps/presto
wget -P /opt/apps/presto/ http://doc.yihongyeyan.com/qf/project/soft/presto/presto-cli-0.236-executable.jar
cd /opt/apps/presto/
[root@zhuguofu presto]# mv presto-cli-0.236-executable.jar presto
[root@zhuguofu presto]# chmod u+x presto
[root@zhuguofu presto]# ln -s /opt/apps/presto/presto /usr/bin/presto
# 至此presto 安装完毕
#连接presto
[root@zhuguofu presto-server]# presto --server zhuguofu:8082 --catalog hive --schema ods_news
show tables;
show schemas;
15.3附升级Hive
### hive2.3.7替换hive1.2.1
#### 15.3.1hdfs dfs -rm -r -f /user/root/warehouse
#### 15.3.2配置hive-site.xml
<configuration>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>192.168.10.101</value>
</property>
<property>
<name>hive.server2.long.polling.timeout</name>
<value>50000</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://zhuguofu:9083</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/root/warehouse</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://zhuguofu:3306/hive?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<!--是否在Hive提示中包含当前数据库-->
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<!--让Hive决定是否在本地模式下自动运行-->
<property>
<name>hive.exec.mode.local.auto</name>
<value>true</value>
</property>
<!--hive-site.xml添加-->
<!--关闭metastore版本校验 hive2.x版本添加-->
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<!--hive2.x版本添加-->
<property>
<name>datanucleus.metadata.validate</name>
<value>false</value>
</property>
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>true</value>
</property>
</configuration>
```
### 15.3.3hadoop的core-site.xml
#vi /opt/apps/hadoop-2.8.1/etc/hadoop/core-site.xml
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
hdfs-site.xml添加
<!-- WebHDFS (REST API) in Namenodes and Datanodes. -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
16.安装clickhouse+chproxy
16.1单机clickhouse
### 1.yum安装
#1.
yum install yum-utils
#2.
rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG
#3.
yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64
#4.
yum install clickhouse-server clickhouse-client
### 2.rpm安装
#将clickhouse目录下的四个rpm上传到/home/clickhouse下
ll /home/clickhouse/*
#安装(不考虑顺序)
yum -y install /home/clickhouse/clickhouse-*.rpm
#顺序安装
#单个安装,需要考虑依赖顺序
#1.
yum -y install /home/clickhouse/clickhouse-common-static-20.5.2.7-2.x86_64.rpm
#2.
yum -y install /home/clickhouse/clickhouse-client-20.5.2.7-2.noarch.rpm
#3.
yum -y install /home/clickhouse/clickhouse-server-20.5.2.7-2.noarch.rpm
#4.
yum -y install /home/clickhouse/clickhouse-test-20.5.2.7-2.noarch.rpm
tip:安装perl(JSON::XS)这个依赖包
yum install -y epel-release perl-JSON-XS
## 2.修改配置文件
### 2.1config.xml
mv /etc/clickhouse-server/config.xml /etc/clickhouse-server/config.xml_bak
vi /etc/clickhouse-server/config.xml
#覆盖内容如下
<?xml version="1.0"?>
<yandex>
<!-- 配置日志文件 -->
<logger>
<level>trace</level>
<log>/etc/clickhouse-server/logs/server.log</log>
<errorlog>/etc/clickhouse-server/logs/error.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<!--配置http、tcp端口和监听地址-->
<http_port>8123</http_port>
<tcp_port>9009</tcp_port>
<interserver_http_port>9010</interserver_http_port>
<listen_host>::</listen_host>
<path>/etc/clickhouse-server/clickhousedata/</path>
<tmp_path>/etc/clickhouse-server/tmp/</tmp_path>
<users_config>users.xml</users_config>
<default_profile>default</default_profile>
<default_database>default</default_database>
<remote_servers incl="clickhouse_remote_servers" />
<zookeeper incl="zookeeper-servers" optional="true" />
<macros incl="macros" optional="true" />
<include_from>/etc/clickhouse-server/metrika.xml</include_from>
<mark_cache_size>5368709120</mark_cache_size>
</yandex>
### 2.2metrika.xml
vi /etc/clickhouse-server/metrika.xml
#覆盖如下内容即可
<?xml version="1.0"?>
<yandex>
<clickhouse_remote_servers>
<news_ck_cluster>
<shard>
<weight>1</weight>
<internal_replication>false</internal_replication>
<replica>
<host>192.168.10.101</host>
<port>9009</port>
<user>default</user>
<password>123456</password>
</replica>
</shard>
</news_ck_cluster>
</clickhouse_remote_servers>
<macros>
<replica>192.168.10.101</replica><!--默认当前服务器ip即可-->
</macros>
<networks>
<ip>::/0</ip>
</networks>
<zookeeper-servers>
<node index="1">
<host>192.168.10.101</host>
<port>2181</port>
</node>
</zookeeper-servers>
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<method>lz4</method>
</case>
</clickhouse_compression>
</yandex>
### 2.3users.xml
mv /etc/clickhouse-server/users.xml /etc/clickhouse-server/users.xml_bak
vi /etc/clickhouse-server/users.xml
#覆盖如下内容
<?xml version="1.0"?>
<yandex>
<profiles>
<!-- 读写用户设置 -->
<default>
<max_memory_usage>10000000000</max_memory_usage>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>random</load_balancing>
</default>
<!-- 只读用户设置 -->
<readonly>
<max_memory_usage>10000000000</max_memory_usage>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>random</load_balancing>
<readonly>1</readonly>
</readonly>
</profiles>
<!-- 配额 -->
<quotas>
<!-- Name of quota. -->
<default>
<interval>
<duration>3600</duration>
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
<users>
<!-- 读写用户 -->
<default>
<password_sha256_hex>8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92</password_sha256_hex>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
<!-- 只读用户 -->
<ck>
<password_sha256_hex>8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92</password_sha256_hex>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>readonly</profile>
<quota>default</quota>
</ck>
</users>
</yandex>
## 3.启动
#后台服务启动
service clickhouse-server status
#或者 centos7.x命令
systemctl start clickhouse-server
systemctl status clickhouse-server
systemctl stop clickhouse-server
##推荐使用启动命令:
clickhouse-server --config-file=/etc/clickhouse-server/config.xml
## 4.客户端连接
clickhouse-client \
--host=localhost \
--port=9009 \
--user=default \
--password=123456
##操作
show databases;
show tables;
use default;
select * from system.clusters;
16.2集群clickhouse
## 3.1 安装
其它服务器和单机版一样,执行clickhouse的安装操作。
[root@hadoop01 home]# scp -r /home/clickhouse/ hadoop02:/home/
[root@hadoop01 home]# scp -r /home/clickhouse/ hadoop03:/home/
# 安装依赖
[root@hadoop02 home]# yum install -y epel-release
[root@hadoop02 home]# yum install -y perl-JSON-XS
[root@hadoop03 home]# yum install -y epel-release
[root@hadoop03 home]# yum install -y perl-JSON-XS
# 安装clickhouse
[root@hadoop02 home]# yum -y install /home/clickhouse/clickhouse-*.rpm
[root@hadoop03 home]# yum -y install /home/clickhouse/clickhouse-*.rpm
## 3.2 配置
**创建 /etc/clickhouse-server/metrika.xml**
[root@hadoop01 home]# vi /etc/clickhouse-server/metrika.xml
#覆盖如下内容即可
<yandex>
<clickhouse_remote_servers>
<news_ck_cluster>
<shard>
<weight>1</weight>
<internal_replication>false</internal_replication>
<replica>
<host>192.168.10.101</host>
<port>9009</port>
<user>default</user>
<password>123456</password>
</replica>
<replica>
<host>192.168.10.102</host>
<port>9000</port>
<user>default</user>
<password>123456</password>
</replica>
</shard>
<shard>
<weight>1</weight>
<internal_replication>false</internal_replication>
<replica>
<host>192.168.10.102</host>
<port>9009</port>
<user>default</user>
<password>123456</password>
</replica>
<replica>
<host>192.168.10.103</host>
<port>9009</port>
<user>default</user>
<password>123456</password>
</replica>
</shard>
<shard>
<weight>1</weight>
<internal_replication>false</internal_replication>
<replica>
<host>192.168.10.101</host>
<port>9009</port>
<user>default</user>
<password>123456</password>
</replica>
<replica>
<host>192.168.10.103</host>
<port>9009</port>
<user>default</user>
<password>123456</password>
</replica>
</shard>
</news_ck_cluster>
</clickhouse_remote_servers>
<macros>
<replica>192.168.10.101</replica> <!--默认当前服务器ip即可-->
</macros>
<networks>
<ip>::/0</ip>
</networks>
<zookeeper-servers>
<node index="1">
<host>192.168.10.101</host>
<port>2181</port>
</node>
<node index="2">
<host>192.168.10.102</host>
<port>2181</port>
</node>
<node index="3">
<host>192.168.10.103</host>
<port>2181</port>
</node>
</zookeeper-servers>
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<method>lz4</method>
</case>
</clickhouse_compression>
</yandex>
```
**复制config.xml users.xml metrika.xml到其它服务器**
# copy配置文件
[root@hadoop01 clickhouse-server]# scp -r config.xml users.xml metrika.xml hadoop02:/etc/clickhouse-server
[root@hadoop01 clickhouse-server]# scp -r config.xml users.xml metrika.xml hadoop03:/etc/clickhouse-server
## 3.3 服务启动
推荐使用启动命令:
[root@hadoop01 home]# clickhouse-server --config-file=/etc/clickhouse-server/config.xml
[root@hadoop02 home]# clickhouse-server --config-file=/etc/clickhouse-server/config.xml
[root@hadoop03 home]# clickhouse-server --config-file=/etc/clickhouse-server/config.xml
## 3.4 客户端连接
[root@hadoop01 clickhouse-server]# clickhouse-client \
--host=localhost \
--port=9009 \
--user=default \
--password=123456
tip:集群模式需要依赖zk来存储元数据,所以需要先启动zk,才能启动clickhouse。
16.3安装chproxy
###Chproxy安装
# 下载Chproxy
mkdir -p /opt/apps/chproxy/
wget -P /opt/apps/chproxy/ http://doc.yihongyeyan.com/qf/project/soft/chproxy/chproxy-linux-amd64-v1.14.0.tar.gz
cd /opt/apps/chproxy/ && tar -xvzf chproxy-linux-amd64-v1.14.0.tar.gz
# 配置Chproxy ,执行如下命令,如下替换下方配置中的允许的网段信息,和你的机器信息
mkdir -p /opt/apps/chproxy
tar -zxvf /opt/software/chproxy-linux-amd64-v1.14.0.tar.gz -C /opt/apps/chproxy
配置文件
cat << EOF > /opt/apps/chproxy/config.yml
hack_me_please: false
log_debug: true
server:
http:
#chproxy监听端口
listen_addr: ":8321"
# 允许访问的网段
allowed_networks: ["172.0.0.0/8","192.0.0.0/8"]
read_timeout: 3m
param_groups:
#jdbc url参数配置
- name: "qfq"
params:
- key: "max_query_size"
value: "100000000"
- key: "max_ast_elements"
value: "10000000"
- key: "max_expanded_ast_elements"
value: "10000000"
users:
#用户信息[写入权限],使用此用户名和密码连接clickhouse
- name: "qf-insert"
password: "123456"
to_cluster: "news_ck_cluster"
to_user: "default"
params: "qfq"
allow_cors: true
cache: "longterm"
# 用户信息[只读权限],使用此用户名和密码连接clickhouse,如果没有可以不写
- name: "qf-readonly"
password: "123456"
to_cluster: "readonly-cluster"
to_user: "readonly"
cache: "shotterm"
params: "qfq"
allow_cors: true
max_concurrent_queries: 200
max_execution_time: 2m
clusters:
#集群信息[写入权限],注意替换为你自己的clickhouse地址
- name: "news_ck_cluster"
#clichouse集群地址,多个以逗号分隔
nodes: [
"192.168.10.101:8123"
]
# clickhouse 中你配置的用户名和密码
users:
- name: "default"
password: "123456"
#clichouse集群地址[只读],多个以逗号分隔,如果没有可以不写
- name: "readonly-cluster"
nodes: [
"192.168.10.101:8123"
]
# clickhouse 中你配置的用户名和密码
users:
- name: "readonly"
password: "123456"
caches:
# chproxy自己的缓存,默认是在磁盘上
- name: "longterm"
dir: "/data1/chproxy/cache/"
max_size: 950Mb
expire: 30s
- name: "shotterm"
dir: "/data1/chproxy/cache-shot/"
max_size: 950Mb
expire: 30s
EOF
启动测试
/opt/apps/chproxy/chproxy --config /opt/apps/chproxy/config.yml
17.安装docker+milvus
17.1安装docker
# 执行如下命令安装docker-ce yum 源
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# 安装docker-ce
yum makecache fast
yum install -y docker-ce
# docker 服务配置目录
mkdir /etc/docker/
mkdir /data1/docker/
# 执行如下命令写入配置
cat << EOF >/etc/daemon.json
{"selinux-enabled": false,"debug": true,"registry-mirrors": ["https://regmirror.qiniu.com","https://registry.docker-cn.com"],"insecure-registries":
["https://reg-mirror.qiniu.com"],"graph": "/data1/docker"}
EOF
# 执行如下命令,启动docker
systemctl start docker
#测试是否ok
docker run hello-world
# 查看容器运行状态
sudo docker ps
17.2安装milvus
##安装milvus
# 执行如下命令,安装milvus
docker pull milvusdb/milvus:0.10.0-cpu-d061620-5f3c00
# 配置milvus
mkdir -p /home/$USER/milvus/conf
cd /home/$USER/milvus/conf
##wget下载不行,直接粘贴server_config.yaml文件内容即可
wget https://raw.githubusercontent.com/milvus-io/milvus/v0.10.0/core/conf/demo/server_config.yaml
# 启动milvus容器
docker run -d --name milvus_cpu_0.10.0 \
-p 19530:19530 \
-p 19121:19121 \
-v /home/$USER/milvus/db:/var/lib/milvus/db \
-v /home/$USER/milvus/conf:/var/lib/milvus/conf \
-v /home/$USER/milvus/logs:/var/lib/milvus/logs \
-v /home/$USER/milvus/wal:/var/lib/milvus/wal \
milvusdb/milvus:0.10.0-cpu-d061620-5f3c00
# 查看容器运行状态
docker ps
# 查看日志
docker logs <milvus container id>
tip:milvus操作
启动docker
systemctl start docker
启动milvus
##--name要改变
docker run -d --name milvus_cpu_0.10.0.2 \
-p 19530:19530 \
-p 19121:19121 \
-v /home/$USER/milvus/db:/var/lib/milvus/db \
-v /home/$USER/milvus/conf:/var/lib/milvus/conf \
-v /home/$USER/milvus/logs:/var/lib/milvus/logs \
-v /home/$USER/milvus/wal:/var/lib/milvus/wal \
milvusdb/milvus:0.10.0-cpu-d061620-5f3c00
17.3安装错误解决
#######错误解决 不出错跳过
####错误1
Error downloading packages:
3:docker-ce-19.03.12-3.el7.x86_64: [Errno 256] No more mirrors to try.
4:containerd.io-1.2.13-3.2.el7.x86_64: [Errno 256] No more mirrors to try.
#解决方法:
# step 1: 安装必要的一些系统工具
yum install -y yum-utils device-mapper-persistent-data lvm2
# Step 2: 添加软件源信息
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# Step 3: 更新并安装 Docker-CE
yum makecache fast
yum -y install docker-ce
# Step 4: 开启Docker服务
service docker start
####错误2
测试docker报错:
[root@zhuguofu ~]# docker run hello-world
Unable to find image 'hello-world:latest' locally
docker: Error response from daemon: Get https://registry-1.docker.io/v2/: x509:
certificate signed by unknown authority.
See 'docker run --help'.
#解决方法
#进入/etc/docker
#查看有没有 daemon.json。这是docker默认的配置文件。
#如果没有新建,如果有,则修改。
cd /etc/docker
vi daemon.json
{"selinux-enabled": false,"debug": true,"registry-mirrors":
["https://5kt5ut46.mirror.aliyuncs.com","https://registry.dockercn.com"],"insecure-registries": ["https://5kt5ut46.mirror.aliyuncs.com"]}
#重启docker服务
service docker restart
17.4升级py3.6.6
#1.下载
#2.更换系统yum源(否则很难安装依赖)
cd /etc/yum.repos.d/
wget http://mirrors.aliyun.com/repo/Centos-7.repo
#3.安装依赖(注:需要有自带的python)
yum -y install zlib-devel bzip2-devel openssl-devel ncursesdevel sqlite-devel
#4.将原来的centos 7自带的python 2.7.5进行卸载
mv /usr/bin/python /usr/bin/python_bak
#5.解压源码
tar -zxvf /home/Python-3.6.6.tgz -C /home/
#6.编译安装
cd /home/Python-3.6.6
#编译,自定义安装目录,如果自定目录不在环境变量重要,要把安装的目录加入到环境变量中
./configure --prefix=/usr/local
#这个过程会非常慢
make && make install
#7.建立软连接(不建立就在指定位置找即可)
ln -s /usr/local/bin/python3 /usr/bin/python3
ln -s /usr/local/bin/pip3.6 /usr/bin/pip3
#8.检查是否更新成功
python -V
---presto 启动不起来 修改软连接 注意安装的是supervisor版本为py2.x版本
ln -s /usr/local/bin/python3 /usr/bin/python
ln -s /usr/local/bin/pip3.6 /usr/bin/pip
18.安装Flink
18.1单机安装
##1.解压安装并配置环境变量
tar -zxvf flink-1.9.1-bin-scala_2.11.tgz -C /opt/apps
vi /etc/profile
...
source /etc/profile
##2.启动测试
start-cluster.sh
#关闭stop-cluster.sh
##3.WebUI查看
localhost:8081
18.2Standalone Cluster安装
18.2.1上传解压安装并配置环境变量
tar -zxvf flink-1.9.1-bin-scala_2.11.tgz -C /opt/apps
vi /etc/profile
export FLINK_HOME=/opt/apps/flink-1.9.1
export PATH=$PATH:$FLINK_HOME/bin
source /etc/profile
18.2.2修改conf/flink-conf.yaml文件
# JobManager runs.
jobmanager.rpc.address: hadoop1
# The number of task slots that each TaskManager offers. Each slot runs one parallel pipeline.
taskmanager.numberOfTaskSlots: 2
# The port to which the REST client connects to. If rest.bind-port has
# not been specified, then the server will bind to this port as well.
rest.port: 8081
# The address to which the REST client will connect to
rest.address: hadoop1
18.2.3修改conf/masters
hadoop1:8081
18.2.4修改conf/slaves
hadoop1
hadoop2
hadoop3
18.2.5分发并同步环境变量
scp -r /opt/apps/flink-1.9.1 hadoop2:/opt/apps/
scp -r /opt/apps/flink-1.9.1 hadoop3:/opt/apps/
scp /etc/profile hadoop2:/etc/profile
scp /etc/profile hadoop3:/etc/profile
source /etc/profile
18.2.6启动测试
start-cluster.sh
#WebUI查看
hadoop1:8081
#关闭
stop-cluster.sh
18.3Standalone Cluster HA 安装
1.解压安装
tar -zxvf flink-1.9.1-bin-scala_2.11.tgz -C /opt/apps
vi /etc/profile
...
source /etc/profile
2.修改配置:flink-conf.yaml
# JobManager runs.
jobmanager.rpc.address: hadoop1
# The number of task slots that each TaskManager offers. Each slot runs one parallel pipeline.
taskmanager.numberOfTaskSlots: 2
# The port to which the REST client connects to. If rest.bind-port has
# not been specified, then the server will bind to this port as well.
rest.port: 8081
# The address to which the REST client will connect to
rest.address: hadoop1
# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
high-availability: zookeeper
high-availability.zookeeper.quorum: hadoop1:2181,hadoop2:2181,hadoop3:2181
high-availability.zookeeper.path.root: /flink
high-availability.cluster-id: /cluster_flink
high-availability.storageDir: hdfs://hadoop1:9000/flink/recovery
3.修改配置:masters
hadoop1:8081
hadoop2:8081
4.拷贝hdfs的依赖包
cp /opt/sofware/flink-shaded-hadoop-2-uber-2.7.5-10.0.jar /opt/apps/flink-1.9.1/lib/
5.分发同步
scp -r /opt/apps/flink-1.9.1 hadoop2:/opt/apps/
scp -r /opt/apps/flink-1.9.1 hadoop3:/opt/apps/
scp /etc/profile hadoop2:/etc/profile
scp /etc/profile hadoop3:/etc/profile
source /etc/profile
6.启动集群
#启动顺序:先启动zk和hdfs、再启动flink
start-cluster.sh
18.4Yarn Cluster HA安装
# standalone cluster HA settings
#high-availability: zookeeper
#high-availability.zookeeper.quorum: hadoop1:2181,hadoop2:2181,hadoop3:2181
#high-availability.zookeeper.path.root: /flink_cluster
#high-availability.cluster-id: /flink_cluster
#high-availability.storageDir: hdfs://hadoop1:9000/flink_cluster/recovery
#flink yarn HA settings
high-availability: zookeeper
high-availability.zookeeper.quorum: hadoop1:2181,hadoop2:2181,hadoop3:2181
high-availability.zookeeper.path.root: /flink_yarn
high-availability.cluster-id: /flink_yarn
high-availability.storageDir: hdfs://hadoop1:9000/flink_yarn/recovery
18.5job historyserver配置
# The HistoryServer is started and stopped via bin/historyserver.sh (start|stop)
jobmanager.archive.fs.dir: hdfs://hadoop1:9000/flink_completed_jobs/
historyserver.web.address: 192.168.10.201
historyserver.web.port: 8082
historyserver.archive.fs.dir: hdfs://hadoop1:9000/flink_completed_jobs/
historyserver.archive.fs.refresh-interval: 10000
更多推荐
所有评论(0)