-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinit-hive.sh
177 lines (168 loc) · 6.35 KB
/
init-hive.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/bin/bash
echo "Init Hive"
CONNECTION_DRIVER_NAME=${CONNECTION_DRIVER_NAME:=com.mysql.jdbc.Driver}
HIVE_SERVER_PORT=${HIVE_SERVER_PORT:=10000}
SCHEMA_VERIFICATION=${SCHEMA_VERIFICATION:=false}
METASTORE_PORT=${METASTORE_PORT:=9083}
DEFAULT_FS=${DEFAULT_FS:=file:///}
DB_TYPE=${DB_TYPE:=mysql}
USE_ATLAS=${USE_ATLAS:=false}
HIVE_AUTH=${HIVE_AUTH:=NONE}
KAFKA_LISTENER_TOPIC=${KAFKA_LISTENER_TOPIC:=hive_metastore_listener_events}
MAX_WORKER_THREADS=${MAX_WORKER_THREADS:=2000}
HADOOP_CLIENT_OPTS=${HADOOP_CLIENT_OPTS:='-XX:-UseGCOverheadLimit -Xmx20480m'}
export HADOOP_CLIENT_OPTS=${HADOOP_CLIENT_OPTS}
if [ ! -z ${JSON_LOG} ] ; then
echo "Setting Log type to JSON"
cat log4j2.json.properties >> ${HIVE_HOME}/conf/hive-log4j2.properties
fi
cat >${HIVE_HOME}/conf/hive-site.xml <<EOL
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>${CONNECTION_URL}</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>${CONNECTION_DRIVER_NAME}</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>${CONNECTION_USER_NAME}</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>${CONNECTION_PASSWORD}</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>${SCHEMA_VERIFICATION}</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>${WAREHOUSE_DIR}</value>
<description></description>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://${METASTORE_URI:-localhost}:${METASTORE_PORT:-9083}</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>${HIVE_SERVER_PORT}</value>
</property>
<property>
<name>hive.server2.authentication</name>
<value>${HIVE_AUTH}</value>
</property>
<property>
<name>fs.default.name</name>
<value>${DEFAULT_FS}</value>
</property>
<property>
<name>fs.s3a.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
</property>
<property>
<name>fs.s3.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
</property>
<property>
<name>fs.s3n.awsAccessKeyId</name>
<value>${AWS_ACCESS_KEY}</value>
</property>
<property>
<name>fs.s3n.awsSecretAccessKey</name>
<value>${AWS_SECRET_KEY}</value>
</property>
<property>
<name>hive.security.authorization.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.disallow.incompatible.col.type.changes</name>
<value>false</value>
<description>If true (default is false), ALTER TABLE operations which change the type of a column (say STRING) to an incompatible type (say MAP<STRING, STRING>) are disallowed. RCFile default SerDe (ColumnarSerDe) serializes the values in such a way that the datatypes can be converted from string to any type. The map is also serialized as a string, which can be read as a string as well. However, with any binary serialization, this is not true. Blocking the ALTER TABLE prevents ClassCastExceptions when subsequently trying to access old partitions. Primitive types like INT, STRING, BIGINT, etc are compatible with each other and are not blocked.
See HIVE-4409 for more details.
</description>
</property>
<property>
<name>hive.async.log.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.server2.thrift.max.worker.threads</name>
<value>${MAX_WORKER_THREADS}</value>
</property>
<property>
<name>hive.metastore.event.db.notification.api.auth</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.server.max.threads</name>
<value>${MAX_WORKER_THREADS}</value>
</property>
<property>
<name>hive.metastore.metrics.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.server2.metrics.enabled</name>
<value>true</value>
</property>
EOL
if [ $USE_KAFKA_EVENT_LISTENER == 'true' ]; then
cat >>${HIVE_HOME}/conf/hive-site.xml <<EOL
<property>
<name>hive.metastore.event.listeners</name>
<value>com.expediagroup.apiary.extensions.events.metastore.kafka.listener.KafkaMetaStoreEventListener</value>
</property>
<property>
<name>com.expediagroup.apiary.extensions.events.metastore.kafka.messaging.topic.name</name>
<value>${KAFKA_LISTENER_TOPIC}</value>
</property>
<property>
<name>com.expediagroup.apiary.extensions.events.metastore.kafka.messaging.bootstrap.servers</name>
<value>${BOOTSTRAP_SERVERS}</value>
</property>
<property>
<name>com.expediagroup.apiary.extensions.events.metastore.kafka.messaging.client.id</name>
<value>HIVE_CLIENT</value>
</property>
EOL
fi
if [ $USE_ATLAS == 'true' ]; then
echo "USING ATLAS"
cat >>${HIVE_HOME}/conf/hive-site.xml <<EOL
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.atlas.hive.hook.HiveHook</value>
</property>
EOL
# hive-env extra jars
cat >>${HIVE_HOME}/conf/hive-env.sh <<EOL
export HIVE_AUX_JARS_PATH=${ATLAS_HOME}/hook/hive
EOL
# Atlas application properties
cat >${HIVE_HOME}/conf/atlas-application.properties <<EOL
atlas.hook.hive.synchronous=true
atlas.hook.hive.numRetries=3
atlas.hook.hive.queueSize=10000
atlas.cluster.name=primary
atlas.kafka.zookeeper.connection.timeout.ms=30000
atlas.kafka.zookeeper.session.timeout.ms=60000
atlas.kafka.zookeeper.sync.time.ms=20
atlas.kafka.data=${sys:atlas.home}/data/kafka
atlas.kafka.zookeeper.connect=${ZOOKEEPER_CONNECT}
atlas.kafka.bootstrap.servers=${BOOTSTRAP_SERVERS}
atlas.rest.address=${ATLAS_ADDR}
EOL
fi
cat >>${HIVE_HOME}/conf/hive-site.xml <<EOL
</configuration>
EOL
$HIVE_HOME/bin/schematool -dbType ${DB_TYPE} -initSchema