Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data engineering profils et groupes de navires #4158

Merged
merged 19 commits into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ frontend/cypress/videos/
/datascience/src/pipeline/data/non_commited_data/pno_segments_subscriptions.csv
/datascience/src/pipeline/data/non_commited_data/pno_vessels_subscriptions.csv

# Ignore downloaded external repositories
datascience/tests/test_data/external/*
!datascience/tests/test_data/external/README.md

# Kotlin sessions
.kotlin/

Expand Down
29 changes: 26 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
INFRA_FOLDER="$(shell pwd)/infra/configurations/"
HOST_MIGRATIONS_FOLDER=$(shell pwd)/backend/src/main/resources/db/migration
DATA_WAREHOUSE_INPUT_DATA_FOLDER=$(shell pwd)/datascience/tests/test_data/clickhouse_user_files
EXTERNAL_DATA_FOLDER=$(shell pwd)/datascience/tests/test_data/external

SHELL := /bin/bash
.SHELLFLAGS = -ec
Expand Down Expand Up @@ -270,8 +272,8 @@ docker-compose-puppeteer-up: docker-env

docker-build-pipeline:
docker build -f ./infra/docker/datapipeline/Dockerfile . -t monitorfish-pipeline:$(VERSION)
docker-test-pipeline:
docker run --network host -v /var/run/docker.sock:/var/run/docker.sock -u monitorfish-pipeline:$(DOCKER_GROUP) --env-file datascience/.env.test --env HOST_MIGRATIONS_FOLDER=$(HOST_MIGRATIONS_FOLDER) monitorfish-pipeline:$(VERSION) coverage run -m pytest --pdb tests
docker-test-pipeline: fetch-external-data run-data-warehouse
docker run --network host -v $(EXTERNAL_DATA_FOLDER):/home/monitorfish-pipeline/datascience/tests/test_data/external -v /var/run/docker.sock:/var/run/docker.sock -u monitorfish-pipeline:$(DOCKER_GROUP) --env-file datascience/.env.test --env HOST_MIGRATIONS_FOLDER=$(HOST_MIGRATIONS_FOLDER) monitorfish-pipeline:$(VERSION) coverage run -m pytest --pdb --ignore=tests/test_data/external tests
docker-tag-pipeline:
docker tag monitorfish-pipeline:$(VERSION) docker.pkg.github.com/mtes-mct/monitorfish/monitorfish-pipeline:$(VERSION)
docker-push-pipeline:
Expand All @@ -284,8 +286,29 @@ docker-push-pipeline:

install-pipeline:
cd datascience && poetry install

stop-data-warehouse:
export DATA_WAREHOUSE_PASSWORD=password && \
export DATA_WAREHOUSE_USER=clickhouse_user && \
export DATA_WAREHOUSE_INPUT_DATA_FOLDER=$(DATA_WAREHOUSE_INPUT_DATA_FOLDER) && \
docker compose -f ./datascience/tests/docker-compose.yml down -v

fetch-external-data:
git clone --depth=1 --branch=main https://github.com/MTES-MCT/fisheries-and-environment-data-warehouse.git ./datascience/tests/test_data/external/data_warehouse || echo "Data Warehouse repository already present - skipping git clone"

erase-external-data:
rm -rf datascience/tests/test_data/external/data_warehouse

run-data-warehouse:
export DATA_WAREHOUSE_PASSWORD=password && \
export DATA_WAREHOUSE_USER=clickhouse_user && \
export DATA_WAREHOUSE_INPUT_DATA_FOLDER=$(DATA_WAREHOUSE_INPUT_DATA_FOLDER) && \
docker compose -f ./datascience/tests/docker-compose.yml up -d --remove-orphans

test-pipeline:
cd datascience && export TEST_LOCAL=True && poetry run coverage run -m pytest --pdb tests/ && poetry run coverage report && poetry run coverage html
cd datascience && export TEST_LOCAL=True && poetry run coverage run -m pytest --pdb --ignore=tests/test_data/external tests/ && poetry run coverage report && poetry run coverage html

test-pipeline-with-data_warehouse: fetch-external-data run-data-warehouse test-pipeline stop-data-warehouse

# ----------------------------------------------------------
# Remote: Database commands
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- Vessel profiles including the share (by weight) of catches made per gear, species, fao area, segment, landing_port...
CREATE TABLE IF NOT EXISTS public.vessel_profiles (
cfr VARCHAR NOT NULL,
gears JSONB,
species JSONB,
fao_areas JSONB,
segments JSONB,
landing_ports JSONB,
recent_gears JSONB,
recent_species JSONB,
recent_fao_areas JSONB,
recent_segments JSONB,
recent_landing_ports JSONB,
latest_landing_port VARCHAR,
latest_landing_facade facade
);
5 changes: 5 additions & 0 deletions datascience/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ MONITORFISH_LOCAL_NAME=
MONITORFISH_LOCAL_USER=
MONITORFISH_LOCAL_PWD=

DATA_WAREHOUSE_HOST=
DATA_WAREHOUSE_PORT=
DATA_WAREHOUSE_USER=
DATA_WAREHOUSE_PWD=

# Proxies to use when accessing the Internet
HTTP_PROXY_=
HTTPS_PROXY_=
Expand Down
5 changes: 5 additions & 0 deletions datascience/.env.test
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ MONITORFISH_LOCAL_NAME=db_name
MONITORFISH_LOCAL_USER=db_user
MONITORFISH_LOCAL_PWD=db_pwd

DATA_WAREHOUSE_HOST=0.0.0.0
DATA_WAREHOUSE_PORT=8123
DATA_WAREHOUSE_USER=clickhouse_user
DATA_WAREHOUSE_PWD=password

# Proxy settings
HTTPS_PROXY_=http://some.ip.address:port
HTTP_PROXY_=http://some.ip.address:port
Expand Down
Loading