Skip to content

materialize-s3-iceberg: don't use context manager for transaction #6248

materialize-s3-iceberg: don't use context manager for transaction

materialize-s3-iceberg: don't use context manager for transaction #6248

Workflow file for this run

name: CI
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
build_base_image:
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Prepare
id: prep
run: |
TAG=$(echo $GITHUB_SHA | head -c7)
echo ::set-output name=tag::${TAG}
VERSION=$(cat base-image/VERSION | tr -d '\n')
echo ::set-output name=version::${VERSION}
- name: Login to GitHub package docker registry
run: |
echo "${{ secrets.GITHUB_TOKEN }}" | \
docker login --username ${{ github.actor }} --password-stdin ghcr.io
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
with:
driver-opts: |
network=host
- name: Build base-image Docker Image
uses: docker/build-push-action@v2
with:
context: .
file: base-image/Dockerfile
load: true
tags: ghcr.io/estuary/base-image:local
- name: Push base-image image
uses: docker/build-push-action@v2
with:
context: .
file: base-image/Dockerfile
push: true
tags: ghcr.io/estuary/base-image:${{ steps.prep.outputs.tag }}
- name: Push base-image image with 'dev' tag
if: ${{ github.event_name == 'push' }}
uses: docker/build-push-action@v2
with:
context: .
file: base-image/Dockerfile
push: true # See 'if' above
tags: ghcr.io/estuary/base-image:dev,ghcr.io/estuary/base-image:${{ steps.prep.outputs.version }}
build_connectors:
runs-on: ubuntu-24.04
needs: build_base_image
strategy:
fail-fast: false
matrix:
connector:
- source-alpaca
- source-bigquery-batch
- source-dropbox
- source-dynamodb
- source-firestore
- source-gcs
- source-google-drive
- source-google-pubsub
- source-hello-world
- source-http-file
- source-http-ingest
- source-kafka
- source-kinesis
- source-mongodb
- source-mysql
- source-mysql-batch
- source-oracle
- source-postgres
- source-postgres-batch
- source-redshift-batch
- source-s3
- source-sftp
- source-snowflake
- source-sqlserver
- source-test
- source-azure-blob-storage
- materialize-bigquery
- materialize-databricks
- materialize-dynamodb
- materialize-elasticsearch
- materialize-firebolt
- materialize-gcs-csv
- materialize-gcs-parquet
- materialize-google-pubsub
- materialize-google-sheets
- materialize-mongodb
- materialize-motherduck
- materialize-mysql
- materialize-pinecone
- materialize-postgres
- materialize-redshift
- materialize-rockset
- materialize-s3-csv
- materialize-s3-iceberg
- materialize-s3-parquet
- materialize-snowflake
- materialize-starburst
- materialize-sqlite
- materialize-webhook
- materialize-sqlserver
include:
- connector: source-criteo
connector_type: capture
python: true
- connector: source-shopify
connector_type: capture
python: true
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Prepare
id: prep
run: |
TAG=$(echo $GITHUB_SHA | head -c7)
echo ::set-output name=tag::${TAG}
VERSION=$(cat ${{ matrix.connector }}/VERSION | tr -d '\n')
echo ::set-output name=version::${VERSION}
VARIANTS="${{ matrix.connector }} $(cat ${{ matrix.connector }}/VARIANTS 2>/dev/null || true)"
echo ::set-output name=variants::${VARIANTS}
- name: Download latest Flow release binaries and add them to $PATH
run: |
./fetch-flow.sh
echo "${PWD}/flow-bin" >> $GITHUB_PATH
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v0
with:
project_id: ${{ secrets.GCP_PROJECT_ID }}
service_account_key: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
export_default_credentials: true
- name: Login to GitHub package docker registry
run: |
echo "${{ secrets.GITHUB_TOKEN }}" | \
docker login --username ${{ github.actor }} --password-stdin ghcr.io
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
with:
driver-opts: |
network=host
- name: Create docker network flow-test
run: docker network create flow-test
- name: Start Dockerized test infrastructure
if: |
contains(fromJson('[
"materialize-dynamodb",
"materialize-elasticsearch",
"source-dynamodb",
"source-kinesis",
"source-mysql",
"source-postgres",
"source-sftp",
"source-sqlserver",
"source-mongodb"
]'), matrix.connector)
run: |
set +e # Show logs even if startup fails
docker compose --file ${{ matrix.connector }}/docker-compose.yaml up --wait
docker logs ${{ matrix.connector }}-db-1
- name: Test setup
if: matrix.connector == 'source-mongodb'
run: |
bash source-mongodb/docker-compose-init.sh
- name: Build ${{ matrix.connector }} Docker Image
uses: docker/build-push-action@v2
if: ${{!matrix.python}}
with:
context: .
file: ${{ matrix.connector }}/Dockerfile
load: true
build-args: BASE_IMAGE=ghcr.io/estuary/base-image:${{ steps.prep.outputs.tag }}
tags: ghcr.io/estuary/${{ matrix.connector }}:local
secrets: |
"rockset_api_key=${{ secrets.ROCKSET_API_KEY }}"
- name: Build ${{ matrix.connector }} Python Docker Image
uses: docker/build-push-action@v2
if: ${{matrix.python}}
with:
context: .
file: python/Dockerfile
load: true
build-args: |
CONNECTOR_NAME=${{matrix.connector}}
CONNECTOR_TYPE=${{matrix.connector_type}}
tags: ghcr.io/estuary/${{ matrix.connector }}:local
- name: Setup Python for ${{ matrix.connector }}
uses: actions/setup-python@v5
if: ${{matrix.python}}
with:
python-version: '3.11'
- name: Install Poetry
if: ${{matrix.python}}
uses: snok/install-poetry@v1
- name: Python connector ${{ matrix.connector }} snapshot tests
if: ${{matrix.python}}
run: |
cd ${{matrix.connector}};
poetry install;
source $(poetry env info --path)/bin/activate
cd ..;
pytest ${{matrix.connector}}/tests;
- name: Start Dockerized test infrastructure
if: matrix.connector == 'source-kafka'
run: |
docker compose --file infra/docker-compose.yaml up --wait
- name: Source connector ${{ matrix.connector }} integration tests
if: |
contains(fromJson('[
"source-dynamodb",
"source-gcs",
"source-kinesis",
"source-mysql",
"source-postgres",
"source-kafka",
"source-s3",
"source-sftp",
"source-sqlserver"
]'), matrix.connector)
env:
GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
MYSQL_DATABASE: test
run: CONNECTOR=${{ matrix.connector }} VERSION=local ./tests/run.sh;
- name: Install duckdb
if: matrix.connector == 'materialize-motherduck' || matrix.connector == 'materialize-s3-iceberg'
uses: opt-nc/[email protected]
with:
version: v0.10.3
- name: Materialization connector ${{ matrix.connector }} integration tests
if: |
contains(fromJson('[
"materialize-dynamodb",
"materialize-elasticsearch",
"materialize-google-sheets",
"materialize-pinecone",
"materialize-postgres",
"materialize-mysql",
"materialize-sqlserver",
"materialize-mongodb",
"materialize-motherduck",
"materialize-snowflake",
"materialize-databricks",
"materialize-bigquery",
"materialize-redshift",
"materialize-s3-iceberg"
]'), matrix.connector)
env:
GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
run: CONNECTOR=${{ matrix.connector }} VERSION=local tests/materialize/run.sh;
- name: Push ${{ matrix.connector }} image(s) with commit SHA tag
run: |
for VARIANT in ${{ steps.prep.outputs.variants }}; do
echo "Building and pushing ${VARIANT}:${{ steps.prep.outputs.tag }}...";
docker build --build-arg BASE_CONNECTOR=ghcr.io/estuary/${{ matrix.connector }}:local \
--build-arg DOCS_URL=https://go.estuary.dev/${VARIANT} \
--tag ghcr.io/estuary/${VARIANT}:${{ steps.prep.outputs.tag }} \
--file connector-variant.Dockerfile .;
docker image push ghcr.io/estuary/${VARIANT}:${{ steps.prep.outputs.tag }};
done
- name: Push ${{ matrix.connector }} image(s) with 'dev' and '${{ steps.prep.outputs.version }}' tags
if: ${{ github.event_name == 'push' }}
run: |
for VARIANT in ${{ steps.prep.outputs.variants }}; do
docker image tag ghcr.io/estuary/${VARIANT}:${{ steps.prep.outputs.tag }} ghcr.io/estuary/${VARIANT}:dev;
docker image tag ghcr.io/estuary/${VARIANT}:${{ steps.prep.outputs.tag }} ghcr.io/estuary/${VARIANT}:${{ steps.prep.outputs.version }};
docker image push ghcr.io/estuary/${VARIANT}:dev;
docker image push ghcr.io/estuary/${VARIANT}:${{ steps.prep.outputs.version }};
done
- name: Install psql
if: ${{ github.event_name == 'push' }}
run: |
sudo apt update
sudo apt install postgresql
- uses: dorny/paths-filter@v2
id: filter
with:
filters: |
connector:
- "${{ matrix.connector }}/**"
- name: Refresh connector tags for ${{ matrix.connector }}
if: github.event_name == 'push' && steps.filter.outputs.connector == 'true'
env:
PGHOST: ${{ secrets.POSTGRES_CONNECTOR_REFRESH_HOST }}
PGUSER: ${{ secrets.POSTGRES_CONNECTOR_REFRESH_USER }}
PGPASSWORD: ${{ secrets.POSTGRES_CONNECTOR_REFRESH_PASSWORD }}
PGDATABASE: ${{ secrets.POSTGRES_CONNECTOR_REFRESH_DATABASE }}
run: |
for VARIANT in ${{ steps.prep.outputs.variants }}; do
echo "UPDATE connector_tags SET job_status='{\"type\": \"queued\"}'
WHERE connector_id IN (
SELECT id FROM connectors WHERE image_name='ghcr.io/estuary/${VARIANT}'
) AND image_tag IN (':${{ steps.prep.outputs.version }}', ':dev');" | psql;
done