Skip to content

[RFC] Only build code locations with changes #528

[RFC] Only build code locations with changes

[RFC] Only build code locations with changes #528

name: Dagster Cloud Hybrid Deployment
on:
push: # For full deployment
branches:
- "main"
- "master"
pull_request: # For branch deployments
types: [opened, synchronize, reopened, closed]
concurrency:
# Cancel in-progress deploys to the same branch
group: ${{ github.ref }}
cancel-in-progress: true
env:
DAGSTER_CLOUD_ORGANIZATION: "hooli"
DAGSTER_CLOUD_API_TOKEN: ${{ secrets.DAGSTER_CLOUD_API_TOKEN }}
DAGSTER_PROJECT_DIR: "."
DAGSTER_CLOUD_YAML_PATH: "dagster_cloud.yaml"
# The IMAGE_REGISTRY should match the registry: in dagster_cloud.yaml
IMAGE_REGISTRY: "764506304434.dkr.ecr.us-west-2.amazonaws.com/hooli-data-science-prod"
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
jobs:
dagster-cloud-deploy:
runs-on: ubuntu-22.04
steps:
- name: Pre-run checks
id: prerun
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
- name: Checkout
uses: actions/checkout@v4
if: steps.prerun.outputs.result != 'skip'
with:
ref: ${{ github.head_ref }}
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
- name: Validate configuration
id: ci-validate
if: steps.prerun.outputs.result != 'skip'
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
command: "ci check --project-dir ${{ env.DAGSTER_PROJECT_DIR }} --dagster-cloud-yaml-path ${{ env.DAGSTER_CLOUD_YAML_PATH }}"
- name: Initialize build session
id: ci-init
if: steps.prerun.outputs.result != 'skip'
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
project_dir: ${{ env.DAGSTER_PROJECT_DIR }}
dagster_cloud_yaml_path: ${{ env.DAGSTER_CLOUD_YAML_PATH }}
deployment: 'data-eng-prod'
- name: Generate docker image tag
id: generate-image-tag
if: steps.prerun.outputs.result != 'skip'
run: echo "IMAGE_TAG=$GITHUB_SHA-$GITHUB_RUN_ID-$GITHUB_RUN_ATTEMPT" >> $GITHUB_ENV && echo $IMAGE_TAG
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
if: steps.prerun.outputs.result != 'skip'
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Login to ECR
if: ${{ steps.prerun.outputs.result != 'skip' }}
uses: aws-actions/amazon-ecr-login@v2
with:
mask-password: 'true'
- name: Set Prod Deployment Environment Variable for Push
if: steps.prerun.outputs.result != 'skip' && github.event_name == 'push'
run: |
echo "changed files: ${{ github.event.pull_request.changed_files }} ""
echo "DAGSTER_CLOUD_DEPLOYMENT_NAME=data-eng-prod" >> $GITHUB_ENV
- name: Prepare dbt project
if: steps.prerun.outputs.result != 'skip' && (contains(github.event.pull_request.changed_files, 'hooli_data_eng/') || contains(github.event.pull_request.changed_files, 'dbt_project') )
run: |
uv venv
source .venv/bin/activate
uv pip install dagster-dbt dagster-cloud dbt-core dbt-duckdb dbt-snowflake --upgrade;
dagster-dbt project prepare-and-package --file hooli_data_eng/project.py
dagster-cloud ci dagster-dbt project manage-state --file hooli_data_eng/project.py --source-deployment data-eng-prod
- name: Build and upload Docker image for data-eng-pipeline
if: steps.prerun.outputs.result != 'skip' && (contains(github.event.pull_request.changed_files, 'hooli_data_eng/') || contains(github.event.pull_request.changed_files, 'dbt_project') )
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: ${{ env.IMAGE_REGISTRY }}:${{ env.IMAGE_TAG }}-data-eng-pipeline
# cache-from: type=gha,scope=buildx
# cache-to: type=gha,mode=max,scope=buildx
- name: Update build session with image tag for data-eng-pipeline
id: ci-set-build-output-data-eng-pipeline
if: steps.prerun.outputs.result != 'skip' && (contains(github.event.pull_request.changed_files, 'hooli_data_eng/') || contains(github.event.pull_request.changed_files, 'dbt_project') )
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
command: "ci set-build-output --location-name=data-eng-pipeline --image-tag=$IMAGE_TAG-data-eng-pipeline"
# Build 'basics' code location
- name: Build and upload Docker image for basics
if: steps.prerun.outputs.result != 'skip' && contains(github.event.pull_request.changed_files, 'hooli_basics/')
uses: docker/build-push-action@v5
with:
context: ./hooli_basics
push: true
tags: ${{ env.IMAGE_REGISTRY }}:${{ env.IMAGE_TAG }}-basics
- name: Update build session with image tag for basics
id: ci-set-build-output-basics
if: steps.prerun.outputs.result != 'skip' && contains(github.event.pull_request.changed_files, 'hooli_basics/')
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
command: "ci set-build-output --location-name=basics --image-tag=$IMAGE_TAG-basics"
# Build 'batch enrichment' code location
- name: Build and upload Docker image for batch enrichment
if: steps.prerun.outputs.result != 'skip' && contains(github.event.pull_request.changed_files, 'hooli_batch_enrichment/')
uses: docker/build-push-action@v5
with:
context: ./hooli_batch_enrichment
push: true
tags: ${{ env.IMAGE_REGISTRY }}:${{ env.IMAGE_TAG }}-batch-enrichment
- name: Update build session with image tag for batch enrichment
id: ci-set-build-output-batch-enrichment
if: steps.prerun.outputs.result != 'skip' && contains(github.event.pull_request.changed_files, 'hooli_batch_enrichment/')
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
command: "ci set-build-output --location-name=batch_enrichment --image-tag=$IMAGE_TAG-batch-enrichment"
# Build 'snowflake_insights' code location
- name: Build and upload Docker image for snowflake insights
if: steps.prerun.outputs.result != 'skip' && contains(github.event.pull_request.changed_files, 'hooli_snowflake_insights/')
uses: docker/build-push-action@v5
with:
context: ./hooli_snowflake_insights
push: true
tags: ${{ env.IMAGE_REGISTRY }}:${{ env.IMAGE_TAG }}-snowflake-insights
- name: Update build session with image tag for snowflake insights
id: ci-set-build-output-snowflake-insights
if: steps.prerun.outputs.result != 'skip' && contains(github.event.pull_request.changed_files, 'hooli_snowflake_insights/')
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
command: "ci set-build-output --location-name=snowflake_insights --image-tag=$IMAGE_TAG-snowflake-insights"
# Build 'hooli_data_ingest' code location
- name: Build and upload Docker image for hooli_data_ingest
if: steps.prerun.outputs.result != 'skip' && contains(github.event.pull_request.changed_files, 'hooli-data-ingest/')
uses: docker/build-push-action@v5
with:
context: ./hooli-data-ingest
push: true
tags: ${{ env.IMAGE_REGISTRY }}:${{ env.IMAGE_TAG }}-hooli-data-ingest
- name: Update build session with image tag for hooli_data_ingest
id: ci-set-build-output-hooli-data-ingest
if: steps.prerun.outputs.result != 'skip' && contains(github.event.pull_request.changed_files, 'hooli-data-ingest/')
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
command: "ci set-build-output --location-name=hooli_data_ingest --image-tag=$IMAGE_TAG-hooli-data-ingest"
# Build 'hooli_bi' code location
- name: Build and upload Docker image for hooli_bi
if: steps.prerun.outputs.result != 'skip' && contains(github.event.pull_request.changed_files, 'hooli-bi/')
uses: docker/build-push-action@v5
with:
context: ./hooli-bi
push: true
tags: ${{ env.IMAGE_REGISTRY }}:${{ env.IMAGE_TAG }}-hooli-bi
- name: Update build session with image tag for hooli_bi
id: ci-set-build-output-hooli-bi
if: steps.prerun.outputs.result != 'skip' && contains(github.event.pull_request.changed_files, 'hooli-bi/')
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
command: "ci set-build-output --location-name=hooli_bi --image-tag=$IMAGE_TAG-hooli-bi"
# Build pipes example container
- name: Build and upload Docker image for pipes example
if: steps.prerun.outputs.result != 'skip' && (contains(github.event.pull_request.changed_files, 'hooli_data_eng/') || contains(github.event.pull_request.changed_files, 'dbt_project/'))
uses: docker/build-push-action@v5
with:
context: ./hooli_data_eng/utils/example_container
push: true
tags: ${{ env.IMAGE_REGISTRY }}:latest-pipes-example
- name: Get changed files
id: get-changed-files
run: |
echo "CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }})" >> $GITHUB_ENV
- name: Extract changed directories
id: extract-changed-dirs
run: |
CHANGED_DIRS=$(echo $CHANGED_FILES | tr ' ' '\n' | xargs -n1 dirname | sort | uniq)
FILTERED_DIRS=$(echo $CHANGED_DIRS | tr ' ' '\n' | grep -E '^(dbt_project|hooli_basics|hooli_batch_enrichment|hooli_data_eng|hooli-bi|hooli-data-ingest|hooli_snowflake_insights)$' | tr '\n' ' ')
echo $FILTERED_DIRS
echo "CHANGED_DIRS=$FILTERED_DIRS" >> $GITHUB_ENV
LOCATIONS=""
for DIR in $CHANGED_DIRS; do
case $DIR in
hooli_data_eng|dbt_project) LOCATIONS="$LOCATIONS data-eng-pipeline";;
hooli_basics) LOCATIONS="$LOCATIONS basics";;
hooli_batch_enrichment) LOCATIONS="$LOCATIONS batch_enrichment";;
hooli_snowflake_insights) LOCATIONS="$LOCATIONS snowflake_insights";;
hooli_data_ingest) LOCATIONS="$LOCATIONS hooli_data_ingest";;
hooli_bi) LOCATIONS="$LOCATIONS hooli_bi";;
esac
done
echo "LOCATIONS=$LOCATIONS" >> $GITHUB_ENV
#Deploy
- name: Deploy to Dagster Cloud
id: ci-deploy
if: steps.prerun.outputs.result != 'skip'
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
command: "ci deploy --location-name $LOCATIONS"
# - name: Deploy to Dagster Cloud
# id: ci-deploy
# if: steps.prerun.outputs.result != 'skip'
# run: |
# dagster-cloud ci deploy --location-name $LOCATIONS
# Get branch deployment as input to job trigger below
- name: Get branch deployment
id: get-branch-deployment
if: steps.prerun.outputs.result != 'skip' && github.event_name == 'pull_request'
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
organization_id: 'hooli'
# Trigger dbt slim CI job
- name: Trigger dbt slim CI
if: steps.prerun.outputs.result != 'skip' && github.event_name == 'pull_request'
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
location_name: data-eng-pipeline
deployment: ${{ steps.get-branch-deployment.outputs.deployment }}
job_name: dbt_slim_ci_job
organization_id: hooli
# Summary and comment updates - note these always() run
- name: Update PR comment for branch deployments
id: ci-notify
if: steps.prerun.outputs.result != 'skip' && always()
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
command: "ci notify --project-dir=${{ env.DAGSTER_PROJECT_DIR }}"
- name: Generate summary
id: ci-summary
if: steps.prerun.outputs.result != 'skip' && always()
uses: dagster-io/dagster-cloud-action/actions/utils/[email protected]
with:
command: "ci status --output-format=markdown >> $GITHUB_STEP_SUMMARY"