Skip to content

Commit

Permalink
Use sidecar containers for Cloud SQL
Browse files Browse the repository at this point in the history
Run the Cloud SQL Auth Proxy as a proper sidecar container using
the new feature in Kubernetes 1.29. This avoids having to do
coordination between the main container and the Cloud SQL Auth Proxy
container to shut down the latter for jobs, and also ensures the
correct startup order and shutdown semantics.
  • Loading branch information
rra committed Feb 15, 2025
1 parent 9aa55bd commit a121e71
Show file tree
Hide file tree
Showing 20 changed files with 210 additions and 388 deletions.
11 changes: 5 additions & 6 deletions applications/gafaelfawr/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,17 @@ Authentication and identity system
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| affinity | object | `{}` | Affinity rules for the Gafaelfawr frontend pod |
| cloudsql.affinity | object | `{}` | Affinity rules for the Cloud SQL Proxy pod |
| cloudsql.affinity | object | `{}` | Affinity rules for the standalone Cloud SQL Proxy pod |
| cloudsql.enabled | bool | `false` | Enable the Cloud SQL Auth Proxy, used with Cloud SQL databases on Google Cloud. This will be run as a sidecar for the main Gafaelfawr pods, and as a separate service (behind a `NetworkPolicy`) for other, lower-traffic services. |
| cloudsql.image.pullPolicy | string | `"IfNotPresent"` | Pull policy for Cloud SQL Auth Proxy images |
| cloudsql.image.repository | string | `"gcr.io/cloudsql-docker/gce-proxy"` | Cloud SQL Auth Proxy image to use |
| cloudsql.image.schemaUpdateTagSuffix | string | `"-alpine"` | Tag suffix to use for the proxy for schema updates |
| cloudsql.image.tag | string | `"1.37.4"` | Cloud SQL Auth Proxy tag to use |
| cloudsql.instanceConnectionName | string | None, must be set if Cloud SQL Auth Proxy is enabled | Instance connection name for a Cloud SQL PostgreSQL instance |
| cloudsql.nodeSelector | object | `{}` | Node selection rules for the Cloud SQL Proxy pod |
| cloudsql.podAnnotations | object | `{}` | Annotations for the Cloud SQL Proxy pod |
| cloudsql.resources | object | See `values.yaml` | Resource limits and requests for the Cloud SQL Proxy pod |
| cloudsql.nodeSelector | object | `{}` | Node selection rules for the standalone Cloud SQL Proxy pod |
| cloudsql.podAnnotations | object | `{}` | Annotations for the standalone Cloud SQL Proxy pod |
| cloudsql.resources | object | See `values.yaml` | Resource limits and requests for the Cloud SQL Proxy container |
| cloudsql.serviceAccount | string | None, must be set if Cloud SQL Auth Proxy is enabled | The Google service account that has an IAM binding to the `gafaelfawr` Kubernetes service account and has the `cloudsql.client` role |
| cloudsql.tolerations | list | `[]` | Tolerations for the Cloud SQL Proxy pod |
| cloudsql.tolerations | list | `[]` | Tolerations for the standalone Cloud SQL Proxy pod |
| config.afterLogoutUrl | string | Top-level page of this Phalanx environment | Where to send the user after they log out |
| config.baseInternalUrl | string | FQDN under `svc.cluster.local` | URL for direct connections to the Gafaelfawr service, bypassing the Ingress. Must use a service name of `gafaelfawr` and port 8080. |
| config.cilogon.clientId | string | `nil` | CILogon client ID. One and only one of this, `config.github.clientId`, or `config.oidc.clientId` must be set. |
Expand Down
29 changes: 29 additions & 0 deletions applications/gafaelfawr/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,35 @@ app.kubernetes.io/name: "gafaelfawr"
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Cloud SQL Auth Proxy sidecar container
*/}}
{{- define "gafaelfawr.cloudsqlSidecar" -}}
- name: "cloud-sql-proxy"
command:
- "/cloud_sql_proxy"
- "-ip_address_types=PRIVATE"
- "-log_debug_stdout=true"
- "-structured_logs=true"
- "-instances={{ required "cloudsql.instanceConnectionName must be specified" .Values.cloudsql.instanceConnectionName }}=tcp:5432"
image: "{{ .Values.cloudsql.image.repository }}:{{ .Values.cloudsql.image.tag }}"
imagePullPolicy: {{ .Values.cloudsql.image.pullPolicy | quote }}
{{- with .Values.cloudsql.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
restartPolicy: "Always"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- "all"
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 65532
runAsGroup: 65532
{{- end }}

{{/*
Common environment variables
*/}}
Expand Down
28 changes: 4 additions & 24 deletions applications/gafaelfawr/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,30 +29,6 @@ spec:
automountServiceAccountToken: false
{{- end }}
containers:
{{- if .Values.cloudsql.enabled }}
- name: "cloud-sql-proxy"
command:
- "/cloud_sql_proxy"
- "-ip_address_types=PRIVATE"
- "-log_debug_stdout=true"
- "-structured_logs=true"
- "-instances={{ required "cloudsql.instanceConnectionName must be specified" .Values.cloudsql.instanceConnectionName }}=tcp:5432"
image: "{{ .Values.cloudsql.image.repository }}:{{ .Values.cloudsql.image.tag }}"
imagePullPolicy: {{ .Values.cloudsql.image.pullPolicy | quote }}
{{- with .Values.cloudsql.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- "all"
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 65532
runAsGroup: 65532
{{- end }}
- name: "gafaelfawr"
env:
{{- include "gafaelfawr.envVars" (dict "Chart" .Chart "Release" .Release "Values" .Values "sidecar" true) | nindent 12 }}
Expand Down Expand Up @@ -129,6 +105,10 @@ spec:
- name: "tmp"
mountPath: "/tmp"
{{- end }}
{{- if .Values.cloudsql.enabled }}
initContainers:
{{- include "gafaelfawr.cloudsqlSidecar" | nindent 8 }}
{{- end }}
securityContext:
runAsNonRoot: true
runAsUser: 1000
Expand Down
58 changes: 6 additions & 52 deletions applications/gafaelfawr/templates/job-schema-update.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,56 +29,10 @@ spec:
automountServiceAccountToken: false
{{- end }}
containers:
{{- if .Values.cloudsql.enabled }}
- name: "cloud-sql-proxy"
# Running the sidecar as normal causes it to keep running and thus
# the Pod never exits, the Job never finishes, and the hook blocks
# the sync. Have the main pod signal the sidecar by writing to a
# file on a shared emptyDir file system, and use a simple watcher
# loop in shell in the sidecar container to terminate the proxy when
# the main container finishes.
#
# Based on https://stackoverflow.com/questions/41679364/
command:
- "/bin/sh"
- "-c"
args:
- |
/cloud_sql_proxy -ip_address_types=PRIVATE -log_debug_stdout=true -structured_logs=true -instances={{ required "cloudsql.instanceConnectionName must be specified" .Values.cloudsql.instanceConnectionName }}=tcp:5432 &
PID=$!
while true; do
if [[ -f "/lifecycle/main-terminated" ]]; then
kill $PID
exit 0
fi
sleep 1
done
image: "{{ .Values.cloudsql.image.repository }}:{{ .Values.cloudsql.image.tag }}{{ .Values.cloudsql.image.schemaUpdateTagSuffix }}"
imagePullPolicy: {{ .Values.cloudsql.image.pullPolicy | quote }}
{{- with .Values.cloudsql.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- "all"
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 65532
runAsGroup: 65532
volumeMounts:
- name: "lifecycle"
mountPath: "/lifecycle"
{{- end }}
- name: "gafaelfawr"
command:
- "/bin/sh"
- "-c"
- |
gafaelfawr update-schema
touch /lifecycle/main-terminated
- "gafaelfawr"
- "update-schema"
env:
{{- include "gafaelfawr.envVars" (dict "Chart" .Chart "Release" .Release "Values" .Values "sidecar" true) | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
Expand All @@ -97,8 +51,10 @@ spec:
- name: "config"
mountPath: "/etc/gafaelfawr"
readOnly: true
- name: "lifecycle"
mountPath: "/lifecycle"
{{- if .Values.cloudsql.enabled }}
initContainers:
{{- include "gafaelfawr.cloudsqlSidecar" | nindent 8 }}
{{- end }}
restartPolicy: "Never"
securityContext:
runAsNonRoot: true
Expand All @@ -108,8 +64,6 @@ spec:
- name: "config"
configMap:
name: "gafaelfawr"
- name: "lifecycle"
emptyDir: {}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
Expand Down
19 changes: 8 additions & 11 deletions applications/gafaelfawr/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -318,9 +318,6 @@ cloudsql:
# -- Cloud SQL Auth Proxy tag to use
tag: "1.37.4"

# -- Tag suffix to use for the proxy for schema updates
schemaUpdateTagSuffix: "-alpine"

# -- Pull policy for Cloud SQL Auth Proxy images
pullPolicy: "IfNotPresent"

Expand All @@ -333,7 +330,7 @@ cloudsql:
# @default -- None, must be set if Cloud SQL Auth Proxy is enabled
serviceAccount: ""

# -- Resource limits and requests for the Cloud SQL Proxy pod
# -- Resource limits and requests for the Cloud SQL Proxy container
# @default -- See `values.yaml`
resources:
limits:
Expand All @@ -343,17 +340,17 @@ cloudsql:
cpu: "5m"
memory: "7Mi"

# -- Annotations for the Cloud SQL Proxy pod
podAnnotations: {}
# -- Affinity rules for the standalone Cloud SQL Proxy pod
affinity: {}

# -- Node selection rules for the Cloud SQL Proxy pod
# -- Node selection rules for the standalone Cloud SQL Proxy pod
nodeSelector: {}

# -- Tolerations for the Cloud SQL Proxy pod
tolerations: []
# -- Annotations for the standalone Cloud SQL Proxy pod
podAnnotations: {}

# -- Affinity rules for the Cloud SQL Proxy pod
affinity: {}
# -- Tolerations for the standalone Cloud SQL Proxy pod
tolerations: []

maintenance:
# -- Cron schedule string for Gafaelfawr data consistency audit (in UTC)
Expand Down
3 changes: 1 addition & 2 deletions applications/times-square/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ An API service for managing and rendering parameterized Jupyter notebooks.
| cloudsql.enabled | bool | `false` | Enable the Cloud SQL Auth Proxy sidecar, used with Cloud SQL databases on Google Cloud |
| cloudsql.image.pullPolicy | string | `"IfNotPresent"` | Pull policy for Cloud SQL Auth Proxy images |
| cloudsql.image.repository | string | `"gcr.io/cloudsql-docker/gce-proxy"` | Cloud SQL Auth Proxy image to use |
| cloudsql.image.resources | object | see `values.yaml` | Resource requests and limits for Cloud SQL pod |
| cloudsql.image.schemaUpdateTagSuffix | string | `"-alpine"` | Tag suffix to use for the proxy for schema updates |
| cloudsql.image.resources | object | See `values.yaml` | Resource requests and limits for Cloud SQL pod |
| cloudsql.image.tag | string | `"1.37.4"` | Cloud SQL Auth Proxy tag to use |
| cloudsql.instanceConnectionName | string | `""` | Instance connection name for a Cloud SQL PostgreSQL instance |
| cloudsql.serviceAccount | string | `""` | The Google service account that has an IAM binding to the `times-square` Kubernetes service accounts and has the `cloudsql.client` role |
Expand Down
28 changes: 28 additions & 0 deletions applications/times-square/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,34 @@ app.kubernetes.io/name: {{ include "times-square.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Cloud SQL Auth Proxy sidecar container
*/}}
{{- define "times-square.cloudsqlSidecar" -}}
- name: "cloud-sql-proxy"
command:
- "/cloud_sql_proxy"
- "-ip_address_types=PRIVATE"
- "-log_debug_stdout=true"
- "-structured_logs=true"
- "-instances={{ required "cloudsql.instanceConnectionName must be specified" .Values.cloudsql.instanceConnectionName }}=tcp:5432"
image: "{{ .Values.cloudsql.image.repository }}:{{ .Values.cloudsql.image.tag }}"
imagePullPolicy: {{ .Values.cloudsql.image.pullPolicy | quote }}
{{- with .Values.cloudsql.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
restartPolicy: "Always"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- "all"
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 65532
runAsGroup: 65532
{{- end }}

{{/*
Create the name of the service account to use
Expand Down
24 changes: 4 additions & 20 deletions applications/times-square/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,26 +40,6 @@ spec:
runAsUser: 1000
runAsGroup: 1000
containers:
{{- if .Values.cloudsql.enabled }}
- name: "cloud-sql-proxy"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- "all"
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 65532
runAsGroup: 65532
image: "{{ .Values.cloudsql.image.repository }}:{{ .Values.cloudsql.image.tag }}"
imagePullPolicy: {{ .Values.cloudsql.image.pullPolicy | quote }}
command:
- "/cloud_sql_proxy"
- "-ip_address_types=PRIVATE"
- "-log_debug_stdout=true"
- "-structured_logs=true"
- "-instances={{ required "cloudsql.instanceConnectionName must be specified" .Values.cloudsql.instanceConnectionName }}=tcp:5432"
{{- end }}
- name: {{ .Chart.Name }}
securityContext:
allowPrivilegeEscalation: false
Expand Down Expand Up @@ -122,6 +102,10 @@ spec:
secretKeyRef:
name: {{ template "times-square.fullname" . }}-secret
key: "TS_SENTRY_DSN"
{{- if .Values.cloudsql.enabled }}
initContainers:
{{- include "times-square.cloudsqlSidecar" | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
Expand Down
60 changes: 6 additions & 54 deletions applications/times-square/templates/job-schema-update.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,56 +28,10 @@ spec:
automountServiceAccountToken: false
{{- end }}
containers:
{{- if .Values.cloudsql.enabled }}
- name: "cloud-sql-proxy"
# Running the sidecar as normal causes it to keep running and thus
# the Pod never exits, the Job never finishes, and the hook blocks
# the sync. Have the main pod signal the sidecar by writing to a
# file on a shared emptyDir file system, and use a simple watcher
# loop in shell in the sidecar container to terminate the proxy when
# the main container finishes.
#
# Based on https://stackoverflow.com/questions/41679364/
command:
- "/bin/sh"
- "-c"
args:
- |
/cloud_sql_proxy -ip_address_types=PRIVATE -log_debug_stdout=true -structured_logs=true -instances={{ required "cloudsql.instanceConnectionName must be specified" .Values.cloudsql.instanceConnectionName }}=tcp:5432 &
PID=$!
while true; do
if [[ -f "/lifecycle/main-terminated" ]]; then
kill $PID
exit 0
fi
sleep 1
done
image: "{{ .Values.cloudsql.image.repository }}:{{ .Values.cloudsql.image.tag }}{{ .Values.cloudsql.image.schemaUpdateTagSuffix }}"
imagePullPolicy: {{ .Values.cloudsql.image.pullPolicy | quote }}
{{- with .Values.cloudsql.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- "all"
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 65532
runAsGroup: 65532
volumeMounts:
- name: "lifecycle"
mountPath: "/lifecycle"
{{- end }}
- name: "times-square"
command:
- "/bin/sh"
- "-c"
- |
times-square update-db-schema
touch /lifecycle/main-terminated
- "times-square"
- "update-db-schema"
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy | quote }}
{{- with .Values.resources }}
Expand Down Expand Up @@ -124,17 +78,15 @@ spec:
secretKeyRef:
name: {{ template "times-square.fullname" . }}-secret
key: "TS_SLACK_WEBHOOK_URL"
volumeMounts:
- name: "lifecycle"
mountPath: "/lifecycle"
{{- if .Values.cloudsql.enabled }}
initContainers:
{{- include "times-square.cloudsqlSidecar" | nindent 8 }}
{{- end }}
restartPolicy: "Never"
securityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
volumes:
- name: "lifecycle"
emptyDir: {}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
Expand Down
Loading

0 comments on commit a121e71

Please sign in to comment.