On this page
Command Line
本页描述了 Hive 支持的不同 Client 端。命令行 Client 端当前仅支持嵌入式服务器。 JDBC 和 Thrift-JavaClient 端支持嵌入式服务器和独立服务器。其他语言的 Client 端仅支持独立服务器。
有关独立服务器的详细信息,请参见Hive Server或HiveServer2。
仅在嵌入式模式下运行,即它需要访问 Hive 库。有关更多详细信息,请参见Getting Started和Hive CLI。
JDBC
本文档描述了原始Hive Server(有时称为 Thrift 服务器或* HiveServer1 )的 JDBCClient 端.有关 HiveServer2 JDBCClient 端的信息,请参见HiveServer2Client 端文档中的 JDBC.推荐使用 HiveServer2;原始的 HiveServer 存在多个并发问题,并且缺少 HiveServer2 中提供的一些功能.*
Version information
最初的Hive Server已从version 1.0.0开始的 Hive 发行版中删除。参见HIVE-6977。
对于嵌入式模式,uri 只是“ jdbc:hive://”。对于独立服务器,uri 是“ jdbc:hive:// host:port/dbname”,其中主机和端口由 Hive 服务器的运行位置确定。例如,“ jdbc:hive:// localhost:10000/default”。当前,唯一支持的 dbname 是“默认”。
JDBCClient 端示例代码
import java.sql.SQLException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.sql.DriverManager;
public class HiveJdbcClient {
private static String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver";
public static void main(String[] args) throws SQLException {
try {
Class.forName(driverName);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.exit(1);
}
Connection con = DriverManager.getConnection("jdbc:hive://localhost:10000/default", "", "");
Statement stmt = con.createStatement();
String tableName = "testHiveDriverTable";
stmt.executeQuery("drop table " + tableName);
ResultSet res = stmt.executeQuery("create table " + tableName + " (key int, value string)");
// show tables
String sql = "show tables '" + tableName + "'";
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
if (res.next()) {
System.out.println(res.getString(1));
}
// describe table
sql = "describe " + tableName;
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(res.getString(1) + "\t" + res.getString(2));
}
// load data into table
// NOTE: filepath has to be local to the hive server
// NOTE: /tmp/a.txt is a ctrl-A separated file with two fields per line
String filepath = "/tmp/a.txt";
sql = "load data local inpath '" + filepath + "' into table " + tableName;
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
// select * query
sql = "select * from " + tableName;
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(String.valueOf(res.getInt(1)) + "\t" + res.getString(2));
}
// regular hive query
sql = "select count(1) from " + tableName;
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(res.getString(1));
}
}
}
运行 JDBC 示例代码
# Then on the command-line
$ javac HiveJdbcClient.java
# To run the program in standalone mode, we need the following jars in the classpath
# from hive/build/dist/lib
# hive_exec.jar
# hive_jdbc.jar
# hive_metastore.jar
# hive_service.jar
# libfb303.jar
# log4j-1.2.15.jar
#
# from hadoop/build
# hadoop-*-core.jar
#
# To run the program in embedded mode, we need the following additional jars in the classpath
# from hive/build/dist/lib
# antlr-runtime-3.0.1.jar
# derby.jar
# jdo2-api-2.1.jar
# jpox-core-1.2.2.jar
# jpox-rdbms-1.2.2.jar
#
# as well as hive/build/dist/conf
$ java -cp $CLASSPATH HiveJdbcClient
# Alternatively, you can run the following bash script, which will seed the data file
# and build your classpath before invoking the client.
#!/bin/bash
HADOOP_HOME=/your/path/to/hadoop
HIVE_HOME=/your/path/to/hive
echo -e '1\x01foo' > /tmp/a.txt
echo -e '2\x01bar' >> /tmp/a.txt
HADOOP_CORE={{ls $HADOOP_HOME/hadoop-*-core.jar}}
CLASSPATH=.:$HADOOP_CORE:$HIVE_HOME/conf
for i in ${HIVE_HOME}/lib/*.jar ; do
CLASSPATH=$CLASSPATH:$i
done
java -cp $CLASSPATH HiveJdbcClient
安全集群的 JDBCClient 端设置
要在安全群集上配置 Hive,请将包含 hive-site.xml 的目录添加到 JDBCClient 端的 CLASSPATH。
Python
仅在独立服务器上运行。设置(并导出)PYTHONPATH 到 build/dist/lib/py。
以下代码中导入的 python 模块是通过构建 Hive 生成的。
请注意,生成的 python 模块名称在 hive trunk 中已更改。
#!/usr/bin/env python
import sys
from hive import ThriftHive
from hive.ttypes import HiveServerException
from thrift import Thrift
from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol
try:
transport = TSocket.TSocket('localhost', 10000)
transport = TTransport.TBufferedTransport(transport)
protocol = TBinaryProtocol.TBinaryProtocol(transport)
client = ThriftHive.Client(protocol)
transport.open()
client.execute("CREATE TABLE r(a STRING, b INT, c DOUBLE)")
client.execute("LOAD TABLE LOCAL INPATH '/path' INTO TABLE r")
client.execute("SELECT * FROM r")
while (1):
row = client.fetchOne()
if (row == None):
break
print row
client.execute("SELECT * FROM r")
print client.fetchAll()
transport.close()
except Thrift.TException, tx:
print '%s' % (tx.message)
PHP
仅在独立服务器上运行。
<?php
// set THRIFT_ROOT to php directory of the hive distribution
$GLOBALS['THRIFT_ROOT'] = '/lib/php/';
// load the required files for connecting to Hive
require_once $GLOBALS['THRIFT_ROOT'] . 'packages/hive_service/ThriftHive.php';
require_once $GLOBALS['THRIFT_ROOT'] . 'transport/TSocket.php';
require_once $GLOBALS['THRIFT_ROOT'] . 'protocol/TBinaryProtocol.php';
// Set up the transport/protocol/client
$transport = new TSocket('localhost', 10000);
$protocol = new TBinaryProtocol($transport);
$client = new ThriftHiveClient($protocol);
$transport->open();
// run queries, metadata calls etc
$client->execute('SELECT * from src');
var_dump($client->fetchAll());
$transport->close();
ODBC
仅在独立服务器上运行。 Hive ODBCClient 端提供了一组与 C 兼容的库函数,以类似于 ODBC 规范规定的模式与 Hive Server 进行交互。参见Hive ODBC 驱动程序。
Thrift
Thrift JavaClient 端
在嵌入式模式和独立服务器上均可运行。
Thrift CClient
仅在独立服务器上运行。在作品中。
节俭节点 Client 端
Thrift NodeClient 端可在 github 上的https://github.com/wdavidw/node-thrift-hive和https://github.com/forward/node-hive上获得。
Thrift RubyClient 端
Thrift RubyClient 端可在 github 上的https://github.com/forward3d/rbhive获得。