-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdocker-compose.yaml
More file actions
164 lines (154 loc) · 5.14 KB
/
docker-compose.yaml
File metadata and controls
164 lines (154 loc) · 5.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
version: '3'
# This docker-compose is for developer convenience, not for running in production.
services:
spark-master:
build:
context: .
dockerfile: Dockerfile
platform: linux/amd64
ports:
- "8090:8090"
environment:
- SPARK_MODE=master
- SPARK_MASTER_WEBUI_PORT=8090
- MAX_EXECUTORS=4
- EXECUTOR_CORES=2
- MAX_CORES_PER_APPLICATION=10
- POSTGRES_USER=hive
- POSTGRES_PASSWORD=hivepassword
- POSTGRES_DB=hive
- POSTGRES_URL=postgres:5432
- DATANUCLEUS_AUTO_CREATE_TABLES=true
- REDIS_HOST=redis
- REDIS_PORT=6379
- DELTALAKE_WAREHOUSE_DIR=s3a://cdm-lake/warehouse
- SPARK_JOB_LOG_DIR=s3a://cdm-spark-job-logs/spark-job-logs
- SPARK_JOB_LOG_DIR_CATEGORY=master
- MINIO_URL=http://minio:9002
- MINIO_LOG_USER_ACCESS_KEY=minio-log-access
- MINIO_LOG_USER_SECRET_KEY=minio123
spark-worker-1:
build:
context: .
dockerfile: Dockerfile
platform: linux/amd64
depends_on:
- spark-master
ports:
- "8081:8081"
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_CORES=2
- SPARK_WORKER_MEMORY=1G
- SPARK_WORKER_WEBUI_PORT=8081
- POSTGRES_USER=hive
- POSTGRES_PASSWORD=hivepassword
- POSTGRES_DB=hive
- POSTGRES_URL=postgres:5432
- DATANUCLEUS_AUTO_CREATE_TABLES=true
- REDIS_HOST=redis
- REDIS_PORT=6379
- DELTALAKE_WAREHOUSE_DIR=s3a://cdm-lake/warehouse
- SPARK_JOB_LOG_DIR=s3a://cdm-spark-job-logs/spark-job-logs
- SPARK_JOB_LOG_DIR_CATEGORY=worker-1
- MINIO_URL=http://minio:9002
- MINIO_LOG_USER_ACCESS_KEY=minio-log-access
- MINIO_LOG_USER_SECRET_KEY=minio123
spark-worker-2:
build:
context: .
dockerfile: Dockerfile
platform: linux/amd64
depends_on:
- spark-master
ports:
- "8082:8082"
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_CORES=2
- SPARK_WORKER_MEMORY=1G
- SPARK_WORKER_WEBUI_PORT=8082
- POSTGRES_USER=hive
- POSTGRES_PASSWORD=hivepassword
- POSTGRES_DB=hive
- POSTGRES_URL=postgres:5432
- DATANUCLEUS_AUTO_CREATE_TABLES=true
- REDIS_HOST=redis
- REDIS_PORT=6379
- DELTALAKE_WAREHOUSE_DIR=s3a://cdm-lake/warehouse
- SPARK_JOB_LOG_DIR=s3a://cdm-spark-job-logs/spark-job-logs
- SPARK_JOB_LOG_DIR_CATEGORY=worker-2
- MINIO_URL=http://minio:9002
- MINIO_LOG_USER_ACCESS_KEY=minio-log-access
- MINIO_LOG_USER_SECRET_KEY=minio123
spark-user:
build:
context: .
dockerfile: Dockerfile
platform: linux/amd64
environment:
- SPARK_MASTER_URL=spark://spark-master:7077
- REDIS_HOST=redis
- REDIS_PORT=6379
- SPARK_JOB_LOG_DIR=s3a://cdm-spark-job-logs/spark-job-logs
- SPARK_JOB_LOG_DIR_CATEGORY=jupyter-user1
- MINIO_URL=http://minio:9002
- MINIO_LOG_USER_ACCESS_KEY=minio-readwrite
- MINIO_LOG_USER_SECRET_KEY=minio123
command: /bin/bash -c "tail -f /dev/null"
volumes:
- ./scripts/redis_container_script.py:/app/redis_container_script.py
depends_on:
- spark-master
postgres:
image: postgres:16.3
# To avoid incorrect user permissions, manually create the volume directory before running Docker.
# export UID=$(id -u)
# export GID=$(id -g)
# mkdir -p cdr/cdm/jupyter/cdm-postgres
# reference: https://forums.docker.com/t/systemd-coredump-taking-ownership-of-tmp-db-directory-and-contents-in-rails-app/93609
user: "${UID}:${GID}"
ports:
- "5432:5432"
environment:
- POSTGRES_USER=hive
- POSTGRES_PASSWORD=hivepassword
- POSTGRES_DB=hive
volumes:
- ./cdr/cdm/jupyter/cdm-postgres:/var/lib/postgresql/data # For local development only. In Rancher development, PostgreSQL data shouldn't be stored in a shared mount.
redis:
image: redis:8.0.1
ports:
- "6379:6379"
volumes:
- ./cdr/cdm/jupyter/cdm-redis:/data
command: redis-server --appendonly yes
minio:
image: minio/minio
ports:
- "9002:9002"
# MinIO Console is available at http://localhost:9003
- "9003:9003"
environment:
MINIO_ROOT_USER: minio
MINIO_ROOT_PASSWORD: minio123
healthcheck:
# reference: https://github.com/rodrigobdz/docker-compose-healthchecks?tab=readme-ov-file#minio-release2023-11-01t18-37-25z-and-older
test: timeout 5s bash -c ':> /dev/tcp/127.0.0.1/9002' || exit 1
interval: 1s
timeout: 10s
retries: 5
# Note there is no bucket by default
command: server --address 0.0.0.0:9002 --console-address 0.0.0.0:9003 /data
minio-create-bucket:
image: minio/mc
depends_on:
minio:
condition: service_healthy
entrypoint: /scripts/minio_create_bucket_entrypoint.sh
volumes:
- ./config/cdm-read-write-policy.json:/config/cdm-read-write-policy.json
- ./config/cdm-spark-job-logs-policy.json:/config/cdm-spark-job-logs-policy.json
- ./scripts/minio_create_bucket_entrypoint.sh:/scripts/minio_create_bucket_entrypoint.sh