Skip to content

Commit 3eec153

Browse files
authoredOct 22, 2024
Merge pull request #8 from morand-g/main
geoenrich 0.6.3
2 parents 5e26c5d + a48b2d6 commit 3eec153

File tree

10 files changed

+56
-19
lines changed

10 files changed

+56
-19
lines changed
 

‎.zenodo.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
"license": "GPL-3.0",
2020

21-
"title": "GeoEnrich v0.6.2: a new tool for scientists to painlessly enrich species occurrence data with environmental variables",
21+
"title": "GeoEnrich v0.6.3: a new tool for scientists to painlessly enrich species occurrence data with environmental variables",
2222

2323
"related_identifiers": [
2424
{

‎CHANGELOG.md

+10
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
## v0.6.3
2+
3+
#### New functions
4+
- Added possibility to merge duplicate variables in geoenrich.exports.collate_npy
5+
- Added population density and distance to port
6+
7+
#### Bug fixes:
8+
- Fixed issue that occured when degenerate dimensions are present in remote netcdf file
9+
10+
111
## v0.6.2
212

313
#### New functions:

‎README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# **geoenrich 0.6.2**
1+
# **geoenrich 0.6.3**
22

33
[![Read the Docs](https://img.shields.io/readthedocs/geoenrich)](https://geoenrich.readthedocs.io/en/latest/)
44
[![License](https://img.shields.io/github/license/morand-g/geoenrich?color=green)](https://github.com/morand-g/geoenrich/blob/main/LICENSE)

‎docs/source/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
author = 'Gaétan Morand (UMR Marbec)'
2626

2727
# The full version, including alpha/beta/rc tags
28-
release = '0.6.2'
28+
release = '0.6.3'
2929

3030

3131
# -- General configuration ---------------------------------------------------

‎docs/source/index.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
geoenrich 0.6.2 documentation
1+
geoenrich 0.6.3 documentation
22
==============================
33

44
|Read the Docs| |License| |PyPI| |Python versions| |Last commit| |DOI|

‎geoenrich/data/catalog.csv

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ current3d-u,Copernicus,0.25°,7d,1993-01-01 - 2022-12-28,50 levels,dataset-armor
55
current3d-v,Copernicus,0.25°,7d,1993-01-01 - 2022-12-28,50 levels,dataset-armor-3d-rep-weekly,vgo
66
diatoms,Copernicus,4km,1m,1997-09-04 - ongoing,surface,cmems_obs-oc_glo_bgc-plankton_my_l4-multi-4km_P1M,DIATO
77
dinophytes,Copernicus,4km,1m,1997-09-04 - ongoing,surface,cmems_obs-oc_glo_bgc-plankton_my_l4-multi-4km_P1M,DINO
8+
distance-to-port,Global Fishing Watch,0.01°,,,surface,http://tds.marbec-tools.ird.fr/thredds/dodsC/testAll/geoenrich/dist_port.nc,Band1
89
eke,,,,,,calculated,eke
910
fsle,Aviso+,0.04°,1d,1994-01-04 – ongoing,surface,https://tds.aviso.altimetry.fr/thredds/dodsC/dataset-duacs-dt-global-allsat-madt-fsle,fsle_max
1011
fsle-orientation,Aviso+,0.04°,1d,1994-01-04 – ongoing,surface,https://tds.aviso.altimetry.fr/thredds/dodsC/dataset-duacs-dt-global-allsat-madt-fsle,theta_max
@@ -20,6 +21,7 @@ organic-carbon3d,Copernicus,0.25°,7d,1998-01-01 - 2021-12-29,36 levels,cmems_ob
2021
oxygen,Copernicus,0.25°,1d,1993-01-01 - 2022-12-31,75 levels,cmems_mod_glo_bgc_my_0.25deg_P1D-m,o2
2122
ph,Copernicus,0.25°,1m,1985-01-01 - 2022-12-01,surface,dataset-carbon-rep-monthly,ph
2223
picophytoplankton,Copernicus,4km,1m,1997-09-04 - ongoing,surface,cmems_obs-oc_glo_bgc-plankton_my_l4-multi-4km_P1M,PICO
24+
pop-density-log,GPW v3v4,0.0083°,5y,1995-2020,surface,http://tds.marbec-tools.ird.fr/thredds/dodsC/testAll/geoenrich/pop_density3.nc,Band1
2325
primary-production,Copernicus,4km,1m,1997-09-04 - ongoing,surface,cmems_obs-oc_glo_bgc-pp_my_l4-multi-4km_P1M,PP
2426
prochlorophytes,Copernicus,4km,1m,1997-09-04 - ongoing,surface,cmems_obs-oc_glo_bgc-plankton_my_l4-multi-4km_P1M,PROCHLO
2527
prokaryotes,Copernicus,4km,1m,1997-09-04 - ongoing,surface,cmems_obs-oc_glo_bgc-plankton_my_l4-multi-4km_P1M,PROKAR

‎geoenrich/enrichment.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def enrich(dataset_ref, var_id, geo_buff = None, time_buff = None, depth_request
6363
var_id (str): ID of the variable to download.
6464
geo_buff (int): Geographic buffer for which to download data around occurrence point (kilometers).
6565
time_buff (float list): Time bounds for which to download data around occurrence day (days). For instance, time_buff = [-7, 0] will download data from 7 days before the occurrence to the occurrence date.
66-
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest lower available depth. Anything else downloads surface data.
66+
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest available depth. 'nearest_lower' -> closest lower available depth. Anything else downloads surface data.
6767
downsample (dict): Number of points to skip between each downloaded point, for each dimension, using its standard name as a key.
6868
slice (int tuple): Slice of the enrichment file to use for enrichment.
6969
maxpoints(int): Maximum number of points to download.
@@ -254,7 +254,7 @@ def enrich_download(geodf, varname, var_id, url, geo_buff, time_buff, depth_requ
254254
url (str): Dataset url (including credentials if needed).
255255
geo_buff (int): Geographic buffer for which to download data around occurrence point (kilometers).
256256
time_buff (float list): Time bounds for which to download data around occurrence day (days). For instance, time_buff = [-7, 0] will download data from 7 days before the occurrence to the occurrence date.
257-
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest lower available depth. Anything else downloads surface data.
257+
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest available depth. 'nearest_lower' -> closest lower available depth. Anything else downloads surface data.
258258
downsample (dict): Number of points to skip between each downloaded point, for each dimension, using its standard name as a key.
259259
maxpoints(int): Maximum number of points to download.
260260
force_download(bool): If True, download data regardless of cache status.
@@ -366,7 +366,7 @@ def enrich_copernicus(geodf, varname, var_id, dataset_id, geo_buff, time_buff, d
366366
dataset_id (str): Copernicus dataset ID.
367367
geo_buff (int): Geographic buffer for which to download data around occurrence point (kilometers).
368368
time_buff (float list): Time bounds for which to download data around occurrence day (days). For instance, time_buff = [-7, 0] will download data from 7 days before the occurrence to the occurrence date.
369-
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest lower available depth. Anything else downloads surface data.
369+
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest available depth. 'nearest_lower' -> closest lower available depth. Anything else downloads surface data.
370370
downsample (dict): Number of points to skip between each downloaded point, for each dimension, using its standard name as a key.
371371
maxpoints(int): Maximum number of points to download.
372372
force_download(bool): If True, download data regardless of cache status.
@@ -559,7 +559,7 @@ def row_enrich(row, remote_ds, local_ds, bool_ds, dimdict, var, depth_request, d
559559
bool_ds (netCDF4.Dataset): Local dataset recording whether data has already been downloaded.
560560
dimdict (dict): Dictionary of dimensions as returned by :func:`geoenrich.satellite.get_metadata`.
561561
var (dict): Variable dictionary as returned by :func:`geoenrich.satellite.get_metadata`.
562-
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest lower available depth. Anything else downloads surface data.
562+
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest available depth. 'nearest_lower' -> closest lower available depth. Anything else downloads surface data.
563563
downsample (dict): Number of points to skip between each downloaded point, for each dimension, using its standard name as a key.
564564
force_download(bool): If True, download data regardless of cache status.
565565
Returns:
@@ -679,7 +679,7 @@ def calculate_indices(row, dimdict, var, depth_request, downsample):
679679
row (pandas.Series): GeoDataFrame row to enrich.
680680
dimdict (dict): Dictionary of dimensions as returned by geoenrich.satellite.get_metadata.
681681
var (dict): Variable dictionary as returned by geoenrich.satellite.get_metadata.
682-
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest lower available depth. Anything else downloads surface data.
682+
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest available depth. 'nearest_lower' -> closest lower available depth. Anything else downloads surface data.
683683
downsample (dict): Number of points to skip between each downloaded point, for each dimension, using its standard name as a key.
684684
Returns:
685685
dict: Dictionary of indices for each dimension (keys are standard dimension names).
@@ -727,12 +727,16 @@ def calculate_indices(row, dimdict, var, depth_request, downsample):
727727
# if depth is a dimension, select surface layer, nearest lower value or everything
728728

729729
if ('depth' in dimdict) and (dimdict['depth']['name'] in var['params']):
730-
if depth_request == 'nearest' and pd.notna(row['bestz']):
730+
if depth_request == 'nearest_lower' and pd.notna(row['bestz']):
731731
diffs = (row['bestz'] - dimdict['depth']['vals']).astype('float')
732732
diffs[diffs < 0] = np.nan
733733
d1 = np.nanargmin(diffs)
734734
ind['depth'] = {'min': d1, 'max': d1, 'best': d1, 'step': 1}
735735

736+
elif depth_request == 'nearest' and pd.notna(row['bestz']):
737+
d1 = np.argmin( np.abs(row['bestz'] - dimdict['depth']['vals'] ) )
738+
ind['depth'] = {'min': d1, 'max': d1, 'best': d1, 'step': 1}
739+
736740
elif depth_request == 'all':
737741
ind['depth'] = {'min': 0, 'max': len(dimdict['depth']['vals']) - 1, 'best': None, 'step': 1}
738742

@@ -1087,7 +1091,7 @@ def get_enrichment_id(enrichments, var_id, geo_buff, time_buff, depth_request, d
10871091
var_id (str): ID of the variable to download.
10881092
geo_buff (int): Geographic buffer for which to download data around occurrence point (kilometers).
10891093
time_buff (float list): Time bounds for which to download data around occurrence day (days). For instance, time_buff = [-7, 0] will download data from 7 days before the occurrence to the occurrence date.
1090-
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest lower available depth. Anything else downloads surface data.
1094+
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest available depth. 'nearest_lower' -> closest lower available depth. Anything else downloads surface data.
10911095
downsample (dict): Number of points to skip between each downloaded point, for each dimension, using its standard name as a key.
10921096
10931097
Returns:
@@ -1124,7 +1128,7 @@ def save_enrichment_config(dataset_ref, enrichment_id, var_id, geo_buff, time_bu
11241128
var_id (str): ID of the variable to download.
11251129
geo_buff (int): Geographic buffer for which to download data around occurrence point (kilometers).
11261130
time_buff (float list): Time bounds for which to download data around occurrence day (days). For instance, time_buff = [-7, 0] will download data from 7 days before the occurrence to the occurrence date.
1127-
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest lower available depth. Anything else downloads surface data.
1131+
depth_request (str): For 4D data: 'all' -> data for all depths. 'nearest' -> closest available depth. 'nearest_lower' -> closest lower available depth. Anything else downloads surface data.
11281132
downsample (dict): Number of points to skip between each downloaded point, for each dimension, using its standard name as a key.
11291133
Returns:
11301134
None

‎geoenrich/exports.py

+25-5
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ def export_raster(dataset_ref, occ_id, var_id, path = Path('./'), geo_buff = Non
665665
print('Abort. Array is smaller than 2x2 pixels.')
666666

667667

668-
def collate_npy(ds_ref, data_path, output_res = 32, slice = None, dimension3 = {'example-var': 2}):
668+
def collate_npy(ds_ref, data_path, output_res = 32, slice = None, dimension3 = {'example-var': 2}, duplicates = {'var_to_remove':'var_to_keep'}):
669669

670670
"""
671671
Export a 3D numpy array with all layers for each occurrence of a dataset.
@@ -676,7 +676,9 @@ def collate_npy(ds_ref, data_path, output_res = 32, slice = None, dimension3 = {
676676
data_path (str): path where numpy files will be saved.
677677
output_res (int) : output data resolution along lat and lon axes.
678678
slice (list[int]): if not None, only process the given slice of the dataset.
679-
dimension3: provides the expected 3rd dimension length (time dimension * depth dimension) for each variable where it is larger than 1.
679+
dimension3 (dict): provides the expected 3rd dimension length (time dimension * depth dimension) for each variable where it is larger than 1.
680+
duplicates (dict): dictionnary of variables which should be merged. If var_to_keep is empty, data from var_to_remove are used instead.
681+
680682
681683
Returns:
682684
None
@@ -717,8 +719,12 @@ def collate_npy(ds_ref, data_path, output_res = 32, slice = None, dimension3 = {
717719

718720
# Export np arrays for each occurrence
719721

722+
var_list = [en['parameters']['var_id'] for en in enrichments]
723+
for v in duplicates.keys():
724+
var_list.remove(v)
725+
720726
for occ_id in tqdm(ids):
721-
all_bands = []
727+
all_bands = {}
722728
for en in enrichments:
723729

724730
params = en['parameters']
@@ -742,12 +748,26 @@ def collate_npy(ds_ref, data_path, output_res = 32, slice = None, dimension3 = {
742748
stack = True,
743749
squeeze = False,
744750
target_len = target_len)
745-
all_bands.append(band)
751+
all_bands[var_id] = band
752+
753+
# replace missing values with value from duplicate variable; and remove said duplicates
754+
for to_rem in duplicates:
755+
if np.isnan(all_bands[duplicates[to_rem]]).all():
756+
all_bands[duplicates[to_rem]] = all_bands[to_rem]
757+
all_bands.pop(to_rem)
758+
759+
var_data = [all_bands[k] for k in var_list]
746760

747-
to_save = np.concatenate(all_bands, -1)
761+
to_save = np.concatenate(var_data, -1)
748762
np.save(folderpath / (str(occ_id) + '.npy'), to_save)
749763

750764

765+
with open(folderpath / '0000_npy_metadata.txt', 'w') as f:
766+
for line in var_list:
767+
f.write(f"{line}\n")
768+
769+
770+
751771
# close NC datasets
752772

753773
for en in enrichments:

‎geoenrich/satellite.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,8 @@ def create_nc(var):
246246
bool_ds = nc.Dataset(str(pathd), mode = 'w')
247247

248248
for name, dimension in remote_ds.dimensions.items():
249-
if getattr(remote_ds.variables[name], 'standard_name', 'Unknown') == 'time' or name in ['time', 'time_agg']:
249+
if name in remote_ds.variables and \
250+
(getattr(remote_ds.variables[name], 'standard_name', 'Unknown') == 'time' or name in ['time', 'time_agg']):
250251
local_ds.createDimension(name, None)
251252
bool_ds.createDimension(name, None)
252253
else:

‎setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = geoenrich
3-
version = 0.6.2
3+
version = 0.6.3
44
author = Gaétan Morand (UMR Marbec)
55
author_email = gaetan.morand@ird.fr
66
description = A package to enrich your geo-referenced data (e.g. species occurrences) with environmental data.

0 commit comments

Comments
 (0)
Please sign in to comment.