Skip to content

Commit 299a1dd

Browse files
committed
Update existing module and support creating eks and vpc modules out of the box
1 parent ee7093c commit 299a1dd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1231
-429
lines changed

.gitignore

+30-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,31 @@
1-
.terraform
1+
# Local .terraform directories
2+
**/.terraform/*
3+
4+
# .tfstate files
5+
*.tfstate
6+
*.tfstate.*
7+
8+
# Crash log files
9+
crash.log
10+
11+
# Ignore override files as they are usually used to override resources locally and should not be committed
12+
override.tf
13+
override.tf.json
14+
*_override.tf
15+
*_override.tf.json
16+
17+
# Ignore CLI configuration files
18+
.terraformrc
19+
terraform.rc
20+
21+
# Ignore sensitive variable files
22+
*.tfvars
23+
*.tfvars.json
24+
25+
# Ignore plan output files
26+
*.tfplan
27+
28+
# Ignore lock files
229
.terraform.lock.hcl
3-
terraform.tfstate
4-
terraform.tfstate.backup
30+
31+
*.metaflow*

.pre-commit-config.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
repos:
22
- repo: https://github.com/pre-commit/pre-commit-hooks
3-
rev: v2.3.0
3+
rev: v5.0.0
44
hooks:
55
- id: end-of-file-fixer
66
- id: trailing-whitespace
77
- repo: https://github.com/antonbabenko/pre-commit-terraform
8-
rev: v1.62.0 # Get the latest from: https://github.com/antonbabenko/pre-commit-terraform/releases
8+
rev: v1.96.3
99
hooks:
1010
- id: terraform_fmt
1111
- repo: https://github.com/terraform-docs/terraform-docs
12-
rev: "v0.15.0"
12+
rev: "v0.18.0"
1313
hooks:
1414
- id: terraform-docs-go
1515
name: "Main terraform module docs"

README.md

+41-15
Large diffs are not rendered by default.

aws_managed.tf

+112
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
moved {
2+
from = module.metaflow-metadata-service
3+
to = module.metaflow-metadata-service[0]
4+
}
5+
6+
module "metaflow-metadata-service" {
7+
source = "./modules/metadata-service"
8+
9+
count = var.create_managed_metaflow_metadata_service ? 1 : 0
10+
11+
resource_prefix = local.resource_prefix
12+
resource_suffix = local.resource_suffix
13+
14+
access_list_cidr_blocks = var.access_list_cidr_blocks
15+
database_name = local.database_name
16+
database_password = local.database_password
17+
database_username = local.database_username
18+
db_migrate_lambda_zip_file = var.db_migrate_lambda_zip_file
19+
datastore_s3_bucket_kms_key_arn = local.datastore_s3_bucket_kms_key_arn
20+
enable_api_basic_auth = var.metadata_service_enable_api_basic_auth
21+
enable_api_gateway = var.metadata_service_enable_api_gateway
22+
fargate_execution_role_arn = module.metaflow-computation[0].ecs_execution_role_arn
23+
iam_partition = var.iam_partition
24+
metadata_service_container_image = local.metadata_service_container_image
25+
metaflow_vpc_id = local.vpc_id
26+
rds_master_instance_endpoint = local.rds_master_instance_endpoint
27+
s3_bucket_arn = local.s3_bucket_arn
28+
subnet_ids = local.subnet_ids
29+
vpc_cidr_blocks = local.vpc_cidr_block
30+
with_public_ip = local.with_public_ip
31+
32+
standard_tags = var.tags
33+
}
34+
35+
module "metaflow-ui" {
36+
source = "./modules/ui"
37+
count = var.create_managed_metaflow_ui ? 1 : 0
38+
39+
resource_prefix = local.resource_prefix
40+
resource_suffix = local.resource_suffix
41+
42+
database_name = local.database_name
43+
database_password = local.database_password
44+
database_username = local.database_username
45+
datastore_s3_bucket_kms_key_arn = local.datastore_s3_bucket_kms_key_arn
46+
fargate_execution_role_arn = module.metaflow-computation[0].ecs_execution_role_arn
47+
iam_partition = var.iam_partition
48+
metaflow_vpc_id = local.vpc_id
49+
rds_master_instance_endpoint = local.rds_master_instance_endpoint
50+
s3_bucket_arn = local.s3_bucket_arn
51+
subnet_ids = local.subnet_ids
52+
alb_subnet_ids = local.alb_subnet_ids
53+
ui_backend_container_image = local.metadata_service_container_image
54+
ui_static_container_image = var.ui_static_container_image
55+
alb_internal = !var.metaflow_ui_is_public
56+
ui_allow_list = var.ui_allow_list
57+
58+
METAFLOW_DATASTORE_SYSROOT_S3 = local.METAFLOW_DATASTORE_SYSROOT_S3
59+
certificate_arn = var.ui_certificate_arn
60+
metadata_service_security_group_id = module.metaflow-metadata-service[0].metadata_service_security_group_id
61+
62+
extra_ui_static_env_vars = var.extra_ui_static_env_vars
63+
extra_ui_backend_env_vars = var.extra_ui_backend_env_vars
64+
standard_tags = var.tags
65+
}
66+
67+
moved {
68+
from = module.metaflow-computation
69+
to = module.metaflow-computation[0]
70+
}
71+
72+
module "metaflow-computation" {
73+
source = "./modules/computation"
74+
count = var.create_managed_compute ? 1 : 0
75+
76+
resource_prefix = local.resource_prefix
77+
resource_suffix = local.resource_suffix
78+
79+
batch_type = var.batch_type
80+
compute_environment_desired_vcpus = var.compute_environment_desired_vcpus
81+
compute_environment_instance_types = var.compute_environment_instance_types
82+
compute_environment_max_vcpus = var.compute_environment_max_vcpus
83+
compute_environment_min_vcpus = var.compute_environment_min_vcpus
84+
compute_environment_egress_cidr_blocks = var.compute_environment_egress_cidr_blocks
85+
iam_partition = var.iam_partition
86+
metaflow_vpc_id = local.vpc_id
87+
subnet_ids = local.subnet_ids
88+
launch_template_http_endpoint = var.launch_template_http_endpoint
89+
launch_template_http_tokens = var.launch_template_http_tokens
90+
launch_template_http_put_response_hop_limit = var.launch_template_http_put_response_hop_limit
91+
92+
standard_tags = var.tags
93+
}
94+
95+
moved {
96+
from = module.metaflow-step-function
97+
to = module.metaflow-step-function[0]
98+
}
99+
100+
module "metaflow-step-functions" {
101+
source = "./modules/step-functions"
102+
count = var.create_step_functions ? 1 : 0
103+
104+
resource_prefix = local.resource_prefix
105+
resource_suffix = local.resource_suffix
106+
batch_job_queue_arn = module.metaflow-computation[0].METAFLOW_BATCH_JOB_QUEUE
107+
iam_partition = var.iam_partition
108+
s3_bucket_arn = module.metaflow-datastore[0].s3_bucket_arn
109+
s3_bucket_kms_arn = module.metaflow-datastore[0].datastore_s3_bucket_kms_key_arn
110+
111+
standard_tags = var.tags
112+
}

data.tf

-3
This file was deleted.

ecr.tf

-7
This file was deleted.

eks.tf

+168
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
module "eks" {
2+
source = "terraform-aws-modules/eks/aws"
3+
version = "20.31.6"
4+
count = var.create_eks_cluster ? 1 : 0
5+
6+
cluster_version = "1.31" # Specify the desired EKS version
7+
cluster_name = local.eks_name
8+
vpc_id = local.vpc_id
9+
subnet_ids = local.subnet_ids
10+
enable_irsa = true
11+
eks_managed_node_group_defaults = merge({
12+
ami_type = "AL2023_x86_64_STANDARD"
13+
disk_size = 50
14+
}, var.node_group_defaults)
15+
16+
eks_managed_node_groups = merge({
17+
metaflow_default = {
18+
desired_capacity = 2
19+
max_size = 2
20+
min_size = 1
21+
instance_type = "m5.large"
22+
} }, var.node_groups)
23+
24+
25+
cluster_endpoint_public_access = true
26+
cluster_endpoint_private_access = true
27+
28+
iam_role_additional_policies = length(var.node_group_iam_role_additional_policies) > 0 ? var.node_group_iam_role_additional_policies : {
29+
"default_node" = aws_iam_policy.default_node[0].arn,
30+
"autoscaler" = aws_iam_policy.cluster_autoscaler[0].arn,
31+
# Allow SSM access to the machines incase direct access is needed
32+
"ssm" = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore",
33+
}
34+
35+
tags = var.tags
36+
}
37+
38+
resource "aws_iam_policy" "default_node" {
39+
count = var.create_eks_cluster && length(var.node_group_iam_role_additional_policies) == 0 ? 1 : 0
40+
41+
name_prefix = "${local.resource_prefix}-default-node-policy${local.resource_suffix}"
42+
description = "Default policy for cluster ${local.resource_prefix}-eks${local.resource_suffix}"
43+
policy = data.aws_iam_policy_document.default_node.json
44+
}
45+
46+
data "aws_iam_policy_document" "default_node" {
47+
statement {
48+
sid = "S3"
49+
effect = "Allow"
50+
51+
actions = [
52+
"s3:*",
53+
"kms:*",
54+
]
55+
56+
resources = ["*"]
57+
}
58+
}
59+
60+
data "aws_iam_role" "current_role" {
61+
name = element(split("/", data.aws_caller_identity.current.arn), 1)
62+
}
63+
64+
resource "aws_eks_access_entry" "provider_cluster_admin" {
65+
count = var.create_eks_cluster ? 1 : 0
66+
67+
cluster_name = module.eks[0].cluster_name
68+
principal_arn = data.aws_iam_role.current_role.arn
69+
type = "STANDARD"
70+
}
71+
72+
resource "aws_eks_access_policy_association" "provider_cluster_admin" {
73+
count = var.create_eks_cluster ? 1 : 0
74+
75+
depends_on = [aws_eks_access_entry.provider_cluster_admin]
76+
cluster_name = module.eks[0].cluster_name
77+
policy_arn = "arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy"
78+
principal_arn = data.aws_iam_role.current_role.arn
79+
80+
access_scope {
81+
type = "cluster"
82+
}
83+
}
84+
85+
resource "aws_iam_policy" "cluster_autoscaler" {
86+
count = var.create_eks_cluster && length(var.node_group_iam_role_additional_policies) == 0 ? 1 : 0
87+
88+
name_prefix = "${local.resource_prefix}-cluster-autoscaler${local.resource_suffix}"
89+
description = "EKS cluster-autoscaler policy for cluster ${local.eks_name}"
90+
policy = data.aws_iam_policy_document.cluster_autoscaler[0].json
91+
}
92+
93+
data "aws_iam_policy_document" "cluster_autoscaler" {
94+
count = var.create_eks_cluster ? 1 : 0
95+
statement {
96+
sid = "clusterAutoscalerAll"
97+
effect = "Allow"
98+
99+
actions = [
100+
"autoscaling:DescribeAutoScalingGroups",
101+
"autoscaling:DescribeAutoScalingInstances",
102+
"autoscaling:DescribeLaunchConfigurations",
103+
"autoscaling:DescribeTags",
104+
"ec2:DescribeLaunchTemplateVersions",
105+
]
106+
107+
resources = ["*"]
108+
}
109+
110+
statement {
111+
sid = "clusterAutoscalerOwn"
112+
effect = "Allow"
113+
114+
actions = [
115+
"autoscaling:SetDesiredCapacity",
116+
"autoscaling:TerminateInstanceInAutoScalingGroup",
117+
"autoscaling:UpdateAutoScalingGroup",
118+
]
119+
120+
resources = ["*"]
121+
122+
condition {
123+
test = "StringEquals"
124+
variable = "autoscaling:ResourceTag/kubernetes.io/cluster/${local.eks_name}"
125+
values = ["owned"]
126+
}
127+
128+
condition {
129+
test = "StringEquals"
130+
variable = "autoscaling:ResourceTag/k8s.io/cluster-autoscaler/enabled"
131+
values = ["true"]
132+
}
133+
}
134+
}
135+
136+
data "aws_eks_cluster" "cluster" {
137+
count = var.create_eks_cluster ? 1 : 0
138+
name = module.eks[0].cluster_name
139+
}
140+
141+
data "aws_eks_cluster_auth" "cluster" {
142+
count = var.create_eks_cluster ? 1 : 0
143+
name = module.eks[0].cluster_name
144+
}
145+
146+
module "metaflow_helm" {
147+
source = "./modules/services"
148+
149+
kubernetes_cluster_host = var.create_eks_cluster ? data.aws_eks_cluster.cluster[0].endpoint : ""
150+
kubernetes_cluster_ca_certificate = var.create_eks_cluster ? data.aws_eks_cluster.cluster[0].certificate_authority.0.data : ""
151+
kubernetes_token = var.create_eks_cluster ? data.aws_eks_cluster_auth.cluster[0].token : ""
152+
153+
resource_name_prefix = local.resource_prefix
154+
deploy_metaflow_service = var.deploy_metaflow_services_in_eks
155+
metaflow_helm_values = var.metaflow_helm_values
156+
cluster_name = var.create_eks_cluster ? module.eks[0].cluster_name : ""
157+
region = data.aws_region.current.name
158+
deploy_cluster_autoscaler = var.deploy_cluster_autoscaler
159+
cluster_oidc_provider = var.create_eks_cluster ? module.eks[0].oidc_provider : ""
160+
account_id = data.aws_caller_identity.current.account_id
161+
162+
metaflow_database = {
163+
database_name = local.database_name
164+
host = element(split(":", local.rds_master_instance_endpoint), 0)
165+
user = local.database_username
166+
password = local.database_password
167+
}
168+
}

examples/basic-aws-managed/main.tf

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
###############################################################################
2+
# An example using this module to set up a minimal deployment Metaflow
3+
# with AWS Batch support, without the UI.
4+
###############################################################################
5+
6+
terraform {
7+
required_version = ">= 1.10"
8+
9+
required_providers {
10+
aws = ">= 5.82"
11+
random = ">= 3.6"
12+
}
13+
}
14+
15+
provider "aws" {
16+
region = "us-west-2" # make sure to set the region to the one you want to deploy to
17+
}
18+
19+
20+
module "metaflow" {
21+
source = "../../"
22+
23+
create_vpc = true
24+
25+
tags = {
26+
"managedBy" = "terraform"
27+
}
28+
}
29+
30+
# The module will generate a Metaflow config in JSON format, write it to a file
31+
resource "local_file" "metaflow_config" {
32+
content = module.metaflow.metaflow_aws_managed_profile_json
33+
filename = "./metaflow_profile.json"
34+
}

0 commit comments

Comments
 (0)