-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdocker-compose.lakehouse.yaml
91 lines (86 loc) · 2.72 KB
/
docker-compose.lakehouse.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
version: '3.8'
services:
# Distributed query engine for various types
# of data sources
trino:
ports:
- '8085:8080'
container_name: lakehouse-trino
image: "trinodb/trino:410"
volumes:
- ./trino/etc:/usr/lib/trino/etc:ro
- ./trino/catalog:/etc/trino/catalog
depends_on:
- hive-metastore
# PostgreSQL to store Hive metadata about
# how the datafile are mapped to schemas and tables
metastore_db:
container_name: lakehouse-metastore-db
image: postgres:13
ports:
- '5433:5432' # Access via Thrift protocol
environment:
POSTGRES_USER: hive
POSTGRES_PASSWORD: hive
POSTGRES_DB: metastore
offline-fs:
container_name: lakehouse-offline-fs
image: postgres:13
ports:
- '5434:5432' # Access via Thrift protocol
environment:
POSTGRES_USER: k6
POSTGRES_PASSWORD: k6
POSTGRES_DB: k6
# Expose service to get metadata, which is a repository of metadata about the tables,
# such as database names, table names, schema and data location of each table
hive-metastore:
container_name: lakehouse-hive-metastore
image: starburstdata/hive:3.1.2-e.18
ports:
- '9083:9083'
environment:
HIVE_METASTORE_DRIVER: org.postgresql.Driver
HIVE_METASTORE_JDBC_URL: jdbc:postgresql://metastore_db:5432/metastore
HIVE_METASTORE_USER: hive
HIVE_METASTORE_PASSWORD: hive
# HIVE_METASTORE_WAREHOUSE_DIR: s3://datalake/ # HDFS config, we don't need it
# HIVE_METASTORE_USERS_IN_ADMIN_ROLE: "admin" # We also don't need it
S3_ENDPOINT: http://minio:9000
S3_ACCESS_KEY: minio_access_key
S3_SECRET_KEY: minio_secret_key
S3_PATH_STYLE_ACCESS: "true"
# Below arguments exist for no reasons, but
# we can not live without it
REGION: ""
GOOGLE_CLOUD_KEY_FILE_PATH: ""
AZURE_ADL_CLIENT_ID: ""
AZURE_ADL_CREDENTIAL: ""
AZURE_ADL_REFRESH_URL: ""
AZURE_ABFS_STORAGE_ACCOUNT: ""
AZURE_ABFS_ACCESS_KEY: ""
AZURE_WASB_STORAGE_ACCOUNT: ""
AZURE_ABFS_OAUTH: ""
AZURE_ABFS_OAUTH_TOKEN_PROVIDER: ""
AZURE_ABFS_OAUTH_CLIENT_ID: ""
AZURE_ABFS_OAUTH_SECRET: ""
AZURE_ABFS_OAUTH_ENDPOINT: ""
AZURE_WASB_ACCESS_KEY: ""
depends_on:
- metastore_db
# Parquet files stored in MinIO object storage
minio:
image: minio/minio:RELEASE.2025-01-20T14-49-07Z-cpuv1
container_name: lakehouse-minio
ports:
- "9000:9000"
- "9001:9001"
volumes:
- minio_storage:/data
environment:
MINIO_ACCESS_KEY: minio_access_key
MINIO_SECRET_KEY: minio_secret_key
MINIO_DEFAULT_BUCKETS: "my-bucket"
command: server --console-address ":9001" /data
volumes:
minio_storage: