-
Notifications
You must be signed in to change notification settings - Fork 64
/
Copy pathservice_config_map.yaml
121 lines (110 loc) · 5.33 KB
/
service_config_map.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
{{- if .Values.config.values }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "modelEngine.fullname" . }}-service-config
namespace: {{ .Release.Namespace }}
labels:
{{- include "modelEngine.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": pre-install,pre-upgrade
"helm.sh/hook-weight": "-2"
data:
launch_service_config: |-
dd_trace_enabled: {{ .Values.dd_trace_enabled | default false | quote }}
# Config to know where model-engine is running
gateway_namespace: {{ .Release.Namespace | quote }}
# Config for scale-hosted Hosted Model Inference in the prod cluster, plus a bunch of other config-ish notes
# NOTE: If you add/change values inside this file that need to apply to all clusters, please make changes in
# all service_config_{env}.yaml files as well.
# Config for scale-hosted Hosted Model Inference in the prod cluster, see `service_config` for more details
model_primitive_host: model-server.{{ .Release.Namespace }}.svc.cluster.local
# # Endpoint config
# K8s namespace the endpoints will be created in
endpoint_namespace: {{ .Release.Namespace | quote }}
# Asynchronous endpoints
sqs_profile: {{ $.Values.serviceAccount.sqsProfileName }}
sqs_queue_policy_template: |-
{
"Version": "2012-10-17",
"Id": "__default_policy_ID",
"Statement": [
{
"Sid": "__owner_statement",
"Effect": "Allow",
"Principal": {
"AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:root"
},
"Action": "sqs:*",
"Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
},
{
"Effect": "Allow",
"Principal": {
"AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/{{ $.Values.serviceAccount.sqsProfileName }}"
},
"Action": "sqs:*",
"Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
},
{
"Effect": "Allow",
"Principal": {
"AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/ml_hosted_model_inference"
},
"Action": "sqs:*",
"Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
}
]
}
sqs_queue_tag_template: |-
{
"{{ .Values.tagging.organization }}/product": "{{ .Values.tagging.productTag }}",
"{{ .Values.tagging.organization }}/team": "${team}",
"{{ .Values.tagging.organization }}/contact": "{{ .Values.tagging.contactEmail }}",
"{{ .Values.tagging.organization }}/customer": "AllCustomers",
"{{ .Values.tagging.organization }}/financialOwner": "{{ .Values.tagging.contactEmail }}",
"Launch-Endpoint-Id": "${endpoint_id}",
"Launch-Endpoint-Name": "${endpoint_name}",
"Launch-Endpoint-Created-By": "${endpoint_created_by}"
}
# Billing
billing_queue_arn: arn:aws:events:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:event-bus/money
# The below redis URL would not work if we needed auth, which we do, so we have to pull cache_url from the cache_redis_aws_secret_name
cache_redis_aws_secret_name: "{{ .Values.secrets.redisAwsSecretName }}"
cloud_file_llm_fine_tune_repository: "s3://{{ .Values.aws.s3Bucket }}/hosted-model-inference/llm-ft-job-repository/prod"
dd_trace_enabled: true
istio_enabled: true
sensitive_log_mode: true
tgi_repository: "text-generation-inference"
vllm_repository: "vllm"
lightllm_repository: "lightllm"
tensorrt_llm_repository: "tensorrt-llm"
batch_inference_vllm_repository: "llm-engine/batch-infer-vllm"
user_inference_base_repository: "launch/inference"
user_inference_pytorch_repository: "hosted-model-inference/async-pytorch"
user_inference_tensorflow_repository: "hosted-model-inference/async-tensorflow-cpu"
docker_image_layer_cache_repository: "kaniko-cache"
# S3 access
hf_user_fine_tuned_weights_prefix: "s3://{{ .Values.aws.s3Bucket }}/hosted-model-inference/fine_tuned_weights"
infra_service_config: |-
env: {{ .Values.context | quote }}
cloud_provider: "aws"
env: "prod"
k8s_cluster_name: "{{ .Values.clusterName }}"
dns_host_domain: "model-engine.{{ $.Values.global.networking.internalDomain }}"
default_region: "{{ .Values.aws.region }}"
ml_account_id: "{{ .Values.aws.accountId }}"
docker_repo_prefix: "{{ .Values.aws.accountId }}.dkr.ecr.{{ .Values.aws.region }}.amazonaws.com"
redis_host: "{{ .Values.redis.hostname }}"
s3_bucket: "{{ .Values.aws.s3Bucket }}"
profile_ml_worker: "ml-worker"
profile_ml_inference_worker: "ml-worker"
identity_service_url: "{{ .Values.identityServiceUrl }}"
firehose_role_arn: "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/firehose-stream-logging-role"
firehose_stream_name: "{{ .Values.firehoseStreamName }}"
db_engine_pool_size: 20
db_engine_max_overflow: 10
db_engine_echo: false
db_engine_echo_pool: true
db_engine_disconnect_strategy: "pessimistic"
{{- end }}