charts/model-engine/templates/service_config_map.yaml

{{- if .Values.config.values }}
apiVersion: v1
kind: ConfigMap
metadata:
  name: {{ include "modelEngine.fullname" . }}-service-config
  namespace: {{ .Release.Namespace }}
  labels:
    {{- include "modelEngine.labels" . | nindent 4 }}
  annotations:
    "helm.sh/hook": pre-install,pre-upgrade
    "helm.sh/hook-weight": "-2"
data:
  launch_service_config: |-
    dd_trace_enabled: {{ .Values.dd_trace_enabled | default false | quote }}

    # Config to know where model-engine is running
    gateway_namespace: {{ .Release.Namespace | quote }}

    # Config for scale-hosted Hosted Model Inference in the prod cluster, plus a bunch of other config-ish notes
    # NOTE: If you add/change values inside this file that need to apply to all clusters, please make changes in
    # all service_config_{env}.yaml files as well.

    # Config for scale-hosted Hosted Model Inference in the prod cluster, see `service_config` for more details
    model_primitive_host: model-server.{{ .Release.Namespace }}.svc.cluster.local

    # # Endpoint config
    # K8s namespace the endpoints will be created in
    endpoint_namespace: {{ .Release.Namespace | quote }}

    # Asynchronous endpoints
    sqs_profile: {{ $.Values.serviceAccount.sqsProfileName }}
    sqs_queue_policy_template: |-
      {
          "Version": "2012-10-17",
          "Id": "__default_policy_ID",
          "Statement": [
          {
              "Sid": "__owner_statement",
              "Effect": "Allow",
              "Principal": {
              "AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:root"
              },
              "Action": "sqs:*",
              "Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
          },
          {
              "Effect": "Allow",
              "Principal": {
              "AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/{{ $.Values.serviceAccount.sqsProfileName }}"
              },
              "Action": "sqs:*",
              "Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
          },
          {
              "Effect": "Allow",
              "Principal": {
              "AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/ml_hosted_model_inference"
              },
              "Action": "sqs:*",
              "Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
          }
          ]
      }

    sqs_queue_tag_template: |-
      {
          "{{ .Values.tagging.organization }}/product": "{{ .Values.tagging.productTag }}",
          "{{ .Values.tagging.organization }}/team": "${team}",
          "{{ .Values.tagging.organization }}/contact": "{{ .Values.tagging.contactEmail }}",
          "{{ .Values.tagging.organization }}/customer": "AllCustomers",
          "{{ .Values.tagging.organization }}/financialOwner": "{{ .Values.tagging.contactEmail }}",
          "Launch-Endpoint-Id": "${endpoint_id}",
          "Launch-Endpoint-Name": "${endpoint_name}",
          "Launch-Endpoint-Created-By": "${endpoint_created_by}"
      }

    # Billing
    billing_queue_arn: arn:aws:events:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:event-bus/money
    
    # The below redis URL would not work if we needed auth, which we do, so we have to pull cache_url from the cache_redis_aws_secret_name
    cache_redis_aws_secret_name: "{{ .Values.secrets.redisAwsSecretName }}"

    cloud_file_llm_fine_tune_repository: "s3://{{ .Values.aws.s3Bucket }}/hosted-model-inference/llm-ft-job-repository/prod"

    dd_trace_enabled: true
    istio_enabled: true
    sensitive_log_mode: true
    tgi_repository: "text-generation-inference"
    vllm_repository: "vllm"
    lightllm_repository: "lightllm"
    tensorrt_llm_repository: "tensorrt-llm"
    batch_inference_vllm_repository: "llm-engine/batch-infer-vllm"
    user_inference_base_repository: "launch/inference"
    user_inference_pytorch_repository: "hosted-model-inference/async-pytorch"
    user_inference_tensorflow_repository: "hosted-model-inference/async-tensorflow-cpu"
    docker_image_layer_cache_repository: "kaniko-cache"

    # S3 access
    hf_user_fine_tuned_weights_prefix: "s3://{{ .Values.aws.s3Bucket }}/hosted-model-inference/fine_tuned_weights"
  infra_service_config: |-
    env: {{ .Values.context | quote }}
    cloud_provider: "aws"
    env: "prod"
    k8s_cluster_name: "{{ .Values.clusterName }}"
    dns_host_domain: "model-engine.{{ $.Values.global.networking.internalDomain }}"
    default_region: "{{ .Values.aws.region }}"
    ml_account_id: "{{ .Values.aws.accountId }}"
    docker_repo_prefix: "{{ .Values.aws.accountId }}.dkr.ecr.{{ .Values.aws.region }}.amazonaws.com"
    redis_host: "{{ .Values.redis.hostname }}"
    s3_bucket: "{{ .Values.aws.s3Bucket }}"
    profile_ml_worker: "ml-worker"
    profile_ml_inference_worker: "ml-worker"
    identity_service_url: "{{ .Values.identityServiceUrl }}"
    firehose_role_arn: "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/firehose-stream-logging-role"
    firehose_stream_name: "{{ .Values.firehoseStreamName }}"
    db_engine_pool_size: 20
    db_engine_max_overflow: 10
    db_engine_echo: false
    db_engine_echo_pool: true
    db_engine_disconnect_strategy: "pessimistic"
{{- end }}