自动化运维、大数据、Docker

Hive集群部署


1、 安装JDK

# wget --no-check-certificate --no-cookies --header "Cookie: oraclelicense=accept-securebackup-cookie" https://download.oracle.com/otn-pub/java/jdk/8u191-b12/2787e4a523244c269598db4e85c51e0c/jdk-8u191-linux-x64.tar.gz &&\
# tar -zxf jdk-8u191-linux-x64.tar.gz && mv jdk1.8.0_191/ /usr/local/ && rm -rf jdk-8u191-linux-x64.tar.gz 

2、配置环境变量

# vim /etc/profile
export  JAVA_HOME=/usr/local/jdk1.8.0_191
export HADOOP_HOME=/usr/local/hadoop 
export PATH=$PATH:/usr/local/hadoop/bin:/usr/local/hadoop/sbin 
#HIVE
export HIVE_HOME=/opt/hive
export HIVE_CONF_DIR=$HIVE_HOME/conf
export CLASSPATH=$CLASSPATH:$HIVE_HOME/lib
export PATH=$PATH:$HIVE_HOME/bin

3、安装Hive

# wget http://apache.claz.org/hive/hive-2.3.4/apache-hive-2.3.4-bin.tar.gz
# tar zxf apache-hive-2.3.4-bin.tar.gz
# mv  apache-hive-2.3.4-bin /opt/hive

4、hive-env.sh

# cp hive-env.sh.template hive-env.sh

# cat hive-env.sh|grep -Ev '^$|#'
JAVA_HOME=/usr/local/jdk1.8.0_191
HADOOP_HOME=/opt/hadoop
HIVE_HOME=/opt/hive
export HIVE_CONF_DIR=$HIVE_HOME/conf

5、配置hive日志

# mkdir -p /opt/hive/logs
# vim hive-log4j2.properties
property.hive.log.dir = /opt/hive/logs

6、hive主配置文件hive-site.xml

# cp hive-default.xml.template hive-site.xml

# cat hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
--><configuration>
<property>
    <name>hive.metastore.uris</name>
    <value>thrift://172.16.158.126:9083</value>
</property>

<property>
    <name>datanucleus.autoCreateSchema</name>
    <value>false</value>
</property>

<property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://192.168.99.101:3306/hive_pro?createDatabaseIfNotExist=true&useSSL=false</value>
    <description>JDBC connect string for a JDBC metastore</description>
</property>

<property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>
    <description>Driver class name for a JDBC metastore</description>
</property>

<property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>root</value>
    <description>username to use against metastore database</description>
</property>

<property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>kbsonlong</value>
    <description>password to use against metastore database</description>
</property>


##hiveserver2调用
##现象:使用beeline、jdbc、python调用hiveserver2时,无法查询、建表等Hbase关联表,

  <property>
        <name>hive.server2.enable.doAs</name>
        <value>false</value>
        <description>
      Setting this property to true will have HiveServer2 execute
      Hive operations as the user making the calls to it.
        </description>
  </property>

</configuration>

7、初始化hive metastore元数据

schematool -dbType mysql -initSchema

8、启动hive metastore

nohup hive --service metastore >> ${LOG_PATH}/metastore_`date +%Y%m%d%H`.log 2>&1 &

9、启动hiveserver2

nohup hive --service hiveserver2 > hiveserver2_`date +%Y%m%d%H`.log 2>&1 &

hive集成hbase

cp ${HBASE_HOME}/conf/hbase-site.xml ${HIVE_HOME}/conf/

测试Hive集成hbase

在hive创建hbase表

hive> CREATE TABLE hbase_table_1(key int, value string) 
    > STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    > WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:val")
    > TBLPROPERTIES ("hbase.table.name" = "xyz", "hbase.mapred.output.outputtable" = "xyz");  
OK
Time taken: 1.199 seconds
hive> show tables
    > ;
OK
hbase_table_1

在hbase上查看刚刚创建的表xyz

hbase(main):001:0> describe "xyz"
Table xyz is ENABLED                                                                                                                               
xyz                                                                                                                                                
COLUMN FAMILIES DESCRIPTION                                                                                                                        
{NAME => 'cf1', VERSIONS => '1', EVICT_BLOCKS_ON_CLOSE => 'false', NEW_VERSION_BEHAVIOR => 'false', KEEP_DELETED_CELLS => 'FALSE', CACHE_DATA_ON_WR
ITE => 'false', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', REPLICATION_SCOPE => '0', BLOOMFILTER => 'ROW', CACHE_INDEX_O
N_WRITE => 'false', IN_MEMORY => 'false', CACHE_BLOOMS_ON_WRITE => 'false', PREFETCH_BLOCKS_ON_OPEN => 'false', COMPRESSION => 'NONE', BLOCKCACHE =
> 'true', BLOCKSIZE => '65536'}                                                                                                                    
1 row(s)
Took 0.5187 seconds 
hbase(main):002:0> 
hbase(main):003:0* list "xyz"
TABLE                                                                                                                                              
xyz                                                                                                                                                
1 row(s)
Took 0.0307 seconds                                                                                                                                
=> ["xyz"]
hbase(main):004:0>        

Hiveserver高可用HA

1、配置hiveserver2高可用HA

Hive从0.14开始,使用Zookeeper实现了HiveServer2的HA功能(ZooKeeper Service Discovery),Client端可以通过指定一个nameSpace来连接HiveServer2,而不是指定某一个host和port。
hiveserver2高可用HA示意图

hive-site.xml添加如下配置

##Hiveserver2高可用配置Zookeeper quorum
<property>
    <name>hive.zookeeper.quorum</name>
    <value>master:2181,node01:2181,node02:2181</value>
</property>

<property>
<name>hive.zookeeper.client.port</name>
<value>2181</value>
</property>

<property>
    <name>hive.server2.zookeeper.namespace</name>
    <value>hiveserver2</value>
</property>

<property>
<name>hive.server2.support.dynamic.service.discovery</name>
<value>true</value>
</property>


<property>
<name>hive.server2.thrift.bind.host</name>
<value>0.0.0.0</value>
</property>

<property>
<name>hive.server2.thrift.port</name>
<value>10001</value>
</property>

将/opt/hive整个拷贝到其他hadoop节点,并启动hiveserver2

scp -r /opt/hive 192.168.99.201:/opt/hive

2、JDBC连接


JDBC连接的URL格式为: jdbc:hive2://<zookeeper quorum>/<dbName>;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2 其中: <zookeeper quorum> 为Zookeeper的集群链接串,如zkNode1:2181,zkNode2:2181,zkNode3:2181 <dbName> 为Hive数据库,默认为default serviceDiscoveryMode=zooKeeper 指定模式为zooKeeper zooKeeperNamespace=hiveserver2 指定ZK中的nameSpace,即参数hive.server2.zookeeper.namespace所定义,默认hiveserver2

3、测试hiveserver2

# beeline -u "jdbc:hive2://master:2181,node01:2181,node02:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2"
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/hive/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Connecting to jdbc:hive2://master:2181,node01:2181,node02:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2
19/01/17 16:45:26 [main]: INFO jdbc.HiveConnection: Connected to 0.0.0.0:10000
Connected to: Apache Hive (version 2.3.4)
Driver: Hive JDBC (version 2.3.4)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 2.3.4 by Apache Hive
0: jdbc:hive2://master:2181,node01:2181,node0> show databases;
+----------------+
| database_name  |
+----------------+
| default        |
+----------------+
1 row selected (0.205 seconds)
0: jdbc:hive2://master:2181,node01:2181,node0> use default
. . . . . . . . . . . . . . . . . . . . . . .> ;
No rows affected (0.071 seconds)
0: jdbc:hive2://master:2181,node01:2181,node0> show tables;
+-----------+
| tab_name  |
+-----------+
| test_tb   |
+-----------+
1 row selected (0.074 seconds)
0: jdbc:hive2://master:2181,node01:2181,node0> # Hive....test_tb01
0: jdbc:hive2://master:2181,node01:2181,node0> CREATE TABLE test_tb02(key int, value string)
. . . . . . . . . . . . . . . . . . . . . . .> # .......
. . . . . . . . . . . . . . . . . . . . . . .> STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
. . . . . . . . . . . . . . . . . . . . . . .> # .......
. . . . . . . . . . . . . . . . . . . . . . .> WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:val")
. . . . . . . . . . . . . . . . . . . . . . .> # hbase.table.name..HBase...........Hive.....
. . . . . . . . . . . . . . . . . . . . . . .> # hbase.mapred.output.outputtable................................
. . . . . . . . . . . . . . . . . . . . . . .> TBLPROPERTIES ("hbase.table.name" = "test_tb01", "hbase.mapred.output.outputtable" = "test_tb01");
No rows affected (2.956 seconds)
0: jdbc:hive2://master:2181,node01:2181,node0> show tables;
+------------+
|  tab_name  |
+------------+
| test_tb    |
| test_tb02  |
+------------+
2 rows selected (0.065 seconds)
0: jdbc:hive2://master:2181,node01:2181,node0> show create table test_tb02;
+----------------------------------------------------+
|                   createtab_stmt                   |
+----------------------------------------------------+
| CREATE TABLE `test_tb02`(                          |
|   `key` int COMMENT '',                            |
|   `value` string COMMENT '')                       |
| ROW FORMAT SERDE                                   |
|   'org.apache.hadoop.hive.hbase.HBaseSerDe'        |
| STORED BY                                          |
|   'org.apache.hadoop.hive.hbase.HBaseStorageHandler'  |
| WITH SERDEPROPERTIES (                             |
|   'hbase.columns.mapping'=':key,cf1:val',          |
|   'serialization.format'='1')                      |
| TBLPROPERTIES (                                    |
|   'hbase.mapred.output.outputtable'='test_tb01',   |
|   'hbase.table.name'='test_tb01',                  |
|   'transient_lastDdlTime'='1547714784')            |
+----------------------------------------------------+
14 rows selected (0.348 seconds)
0: jdbc:hive2://master:2181,node01:2181,node0>

查看zk空间中/hiveserve2目录

[zk: localhost:2181(CONNECTED) 18] ls /hiveserver2
[serverUri=0.0.0.0:10001;version=2.3.4;sequence=0000000001, serverUri=0.0.0.0:10001;version=2.3.4;sequence=0000000000]
[zk: localhost:2181(CONNECTED) 19] 

附录:hive启动脚本

#!/bin/bash

##Start hive

export HIVE_HOME=/opt/hive
export HIVE_CONF_DIR=$HIVE_HOME/conf
export CLASSPATH=$CLASSPATH:$HIVE_HOME/lib
export PATH=$PATH:$HIVE_HOME/bin
LOG_PATH=${HIVE_HOME}/logs

start_metastore () {

nohup hive --service metastore >> ${LOG_PATH}/metastore_`date +%Y%m%d%H`.log 2>&1 &
ps -ef | awk '/[m]metastore/{print $2}' >${LOG_PATH}/hive_metastore.pid

}

start_server2() {
nohup hive --service hiveserver2 > ${LOG_PATH}/hiveserver2_`date +%Y%m%d%H`.log 2>&1 &
ps -ef | awk '/[h]iveserver2/{print $2}' >${LOG_PATH}/hive_server2.pid
}


stop_metastore () {
    if [ -f ${LOG_PATH}/hive_metastore.pid ];then
        kill `cat ${LOG_PATH}/hive_metastore.pid`
        rm -rf ${LOG_PATH}/hive_metastore.pid
    else
        echo "metastore is stopped"
    fi

}

stop_server2() {
    if [ -f ${LOG_PATH}/hive_server2.pid ];then
        kill `cat ${LOG_PATH}/hive_server2.pid`
        rm -rf ${LOG_PATH}/hive_server2.pid
    else
        echo "metastore is stopped"
    fi
}

case $1 in 
        start_metastore)
                echo "start hive_metastore now"
                start_metastore
                exit 0
                ;;
        start_server2)
                echo "start hive_server2 now"
                start_server2
                exit 0
                ;;
        stop_metastore)
                echo "stop hive_metastore now"
                stop_metastore
                exit 0
                ;;
        stop_server2)
                echo "stop hive_server2 now"
                stop_server2
                exit 0
                ;;
        *)
                echo "start_hive.sh start_metastore|start_server2|stop_metastore|stop_server2"
                exit 1
                ;;
esac

引导图

赞(0) 打赏
蜷缩的蜗牛 , 版权所有丨如未注明 , 均为原创丨 转载请注明蜷缩的蜗牛 » Hive集群部署
分享到: 更多 (0)

评论 抢沙发

评论前必须登录!

 

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

微信扫一扫打赏